• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *    Steven R. Loomis     7/8/1999      Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include "cstring.h"
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "cintltst.h"
25 #include "unicode/utypes.h"
26 #include "unicode/ustring.h"
27 #include "unicode/ucol.h"
28 #include "unicode/utf16.h"
29 #include "cmemory.h"
30 #include "nucnvtst.h"
31 
32 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
33 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
34 #if !UCONFIG_NO_COLLATION
35 static void TestJitterbug981(void);
36 #endif
37 #if !UCONFIG_NO_LEGACY_CONVERSION
38 static void TestJitterbug1293(void);
39 #endif
40 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
41 static void TestConverterTypesAndStarters(void);
42 static void TestAmbiguous(void);
43 static void TestSignatureDetection(void);
44 static void TestUTF7(void);
45 static void TestIMAP(void);
46 static void TestUTF8(void);
47 static void TestCESU8(void);
48 static void TestUTF16(void);
49 static void TestUTF16BE(void);
50 static void TestUTF16LE(void);
51 static void TestUTF32(void);
52 static void TestUTF32BE(void);
53 static void TestUTF32LE(void);
54 static void TestLATIN1(void);
55 
56 #if !UCONFIG_NO_LEGACY_CONVERSION
57 static void TestSBCS(void);
58 static void TestDBCS(void);
59 static void TestMBCS(void);
60 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
61 static void TestICCRunout(void);
62 #endif
63 
64 #ifdef U_ENABLE_GENERIC_ISO_2022
65 static void TestISO_2022(void);
66 #endif
67 
68 static void TestISO_2022_JP(void);
69 static void TestISO_2022_JP_1(void);
70 static void TestISO_2022_JP_2(void);
71 static void TestISO_2022_KR(void);
72 static void TestISO_2022_KR_1(void);
73 static void TestISO_2022_CN(void);
74 #if 0
75    /*
76     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
77     */
78 static void TestISO_2022_CN_EXT(void);
79 #endif
80 static void TestJIS(void);
81 static void TestHZ(void);
82 #endif
83 
84 static void TestSCSU(void);
85 
86 #if !UCONFIG_NO_LEGACY_CONVERSION
87 static void TestEBCDIC_STATEFUL(void);
88 static void TestGB18030(void);
89 static void TestLMBCS(void);
90 static void TestJitterbug255(void);
91 static void TestEBCDICUS4XML(void);
92 #if 0
93    /*
94     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
95     */
96 static void TestJitterbug915(void);
97 #endif
98 static void TestISCII(void);
99 
100 static void TestCoverageMBCS(void);
101 static void TestJitterbug2346(void);
102 static void TestJitterbug2411(void);
103 static void TestJB5275(void);
104 static void TestJB5275_1(void);
105 static void TestJitterbug6175(void);
106 
107 static void TestIsFixedWidth(void);
108 #endif
109 
110 static void TestInBufSizes(void);
111 
112 static void TestRoundTrippingAllUTF(void);
113 static void TestConv(const uint16_t in[],
114                      int len,
115                      const char* conv,
116                      const char* lang,
117                      char byteArr[],
118                      int byteArrLen);
119 
120 /* open a converter, using test data if it begins with '@' */
121 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
122 
123 
124 #define NEW_MAX_BUFFER 999
125 
126 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
127 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
128 static char     gNuConvTestName[1024];
129 
130 #define nct_min(x,y)  ((x<y) ? x : y)
131 
my_ucnv_open(const char * cnv,UErrorCode * err)132 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
133 {
134   if(cnv && cnv[0] == '@') {
135     return ucnv_openPackage(loadTestData(err), cnv+1, err);
136   } else {
137     return ucnv_open(cnv, err);
138   }
139 }
140 
printSeq(const unsigned char * a,int len)141 static void printSeq(const unsigned char* a, int len)
142 {
143     int i=0;
144     log_verbose("{");
145     while (i<len)
146         log_verbose("0x%02x ", a[i++]);
147     log_verbose("}\n");
148 }
149 
printUSeq(const UChar * a,int len)150 static void printUSeq(const UChar* a, int len)
151 {
152     int i=0;
153     log_verbose("{U+");
154     while (i<len) log_verbose("0x%04x ", a[i++]);
155     log_verbose("}\n");
156 }
157 
printSeqErr(const unsigned char * a,int len)158 static void printSeqErr(const unsigned char* a, int len)
159 {
160     int i=0;
161     fprintf(stderr, "{");
162     while (i<len)
163         fprintf(stderr, "0x%02x ", a[i++]);
164     fprintf(stderr, "}\n");
165 }
166 
printUSeqErr(const UChar * a,int len)167 static void printUSeqErr(const UChar* a, int len)
168 {
169     int i=0;
170     fprintf(stderr, "{U+");
171     while (i<len)
172         fprintf(stderr, "0x%04x ", a[i++]);
173     fprintf(stderr,"}\n");
174 }
175 
176 static void
TestNextUChar(UConverter * cnv,const char * source,const char * limit,const int32_t results[],const char * message)177 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
178 {
179      const char* s0;
180      const char* s=(char*)source;
181      const int32_t *r=results;
182      UErrorCode errorCode=U_ZERO_ERROR;
183      UChar32 c;
184 
185      while(s<limit) {
186         s0=s;
187         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
188         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
189             break; /* no more significant input */
190         } else if(U_FAILURE(errorCode)) {
191             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
192             break;
193         } else if(
194             /* test the expected number of input bytes only if >=0 */
195             (*r>=0 && (int32_t)(s-s0)!=*r) ||
196             c!=*(r+1)
197         ) {
198             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
199                 message, c, (s-s0), *(r+1), *r);
200             break;
201         }
202         r+=2;
203     }
204 }
205 
206 static void
TestNextUCharError(UConverter * cnv,const char * source,const char * limit,UErrorCode expected,const char * message)207 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
208 {
209      const char* s=(char*)source;
210      UErrorCode errorCode=U_ZERO_ERROR;
211      uint32_t c;
212      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
213      if(errorCode != expected){
214         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
215      }
216      if(c != 0xFFFD && c != 0xffff){
217         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
218      }
219 
220 }
221 
TestInBufSizes(void)222 static void TestInBufSizes(void)
223 {
224   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
225 #if 1
226   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
227   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
228   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
229   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
230   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
231   TestNewConvertWithBufferSizes(1,1);
232   TestNewConvertWithBufferSizes(2,3);
233   TestNewConvertWithBufferSizes(3,2);
234 #endif
235 }
236 
TestOutBufSizes(void)237 static void TestOutBufSizes(void)
238 {
239 #if 1
240   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
241   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
242   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
243   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
244   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
245   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
246 
247 #endif
248 }
249 
250 
addTestNewConvert(TestNode ** root)251 void addTestNewConvert(TestNode** root)
252 {
253 #if !UCONFIG_NO_FILE_IO
254    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
255    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
256 #endif
257    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
258    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
259    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
260    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
261    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
262    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
263 
264    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
265    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
266    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
267    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
268    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
269    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
270    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
271    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
272 
273 #if !UCONFIG_NO_LEGACY_CONVERSION
274    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
275 #endif
276 
277    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
278 
279 #if !UCONFIG_NO_LEGACY_CONVERSION
280    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
281 #if !UCONFIG_NO_FILE_IO
282    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
283    addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
284 #endif
285    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
286 
287 #ifdef U_ENABLE_GENERIC_ISO_2022
288    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
289 #endif
290 
291    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
292    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
293    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
294    // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
295    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
296    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
297    // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
298    /*
299     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
300    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
301    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
302     */
303    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
304 #endif
305 
306    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
307 
308 #if !UCONFIG_NO_LEGACY_CONVERSION
309    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
310    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
311    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
312    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
313    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
314    addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
315    addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
316 #if !UCONFIG_NO_COLLATION
317    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
318 #endif
319 
320    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
321 #endif
322 
323 
324 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
325    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
326 #endif
327 
328    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
329 
330 #if !UCONFIG_NO_LEGACY_CONVERSION
331    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
332    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
333    // android-removed (no full ISO2022 CJK tables)  -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
334    addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
335 #endif
336 }
337 
338 
339 /* Note that this test already makes use of statics, so it's not really
340    multithread safe.
341    This convenience function lets us make the error messages actually useful.
342 */
343 
setNuConvTestName(const char * codepage,const char * direction)344 static void setNuConvTestName(const char *codepage, const char *direction)
345 {
346     snprintf(gNuConvTestName, sizeof(gNuConvTestName), "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
347         codepage,
348         direction,
349         (int)gInBufferSize,
350         (int)gOutBufferSize);
351 }
352 
353 typedef enum
354 {
355   TC_OK       = 0,  /* test was OK */
356   TC_MISMATCH = 1,  /* Match failed - err was printed */
357   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
358 } ETestConvertResult;
359 
360 /* Note: This function uses global variables and it will not do offset
361 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)362 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
363                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
364 {
365     UErrorCode status = U_ZERO_ERROR;
366     UConverter *conv = 0;
367     char    junkout[NEW_MAX_BUFFER]; /* FIX */
368     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
369     char *p;
370     const UChar *src;
371     char *end;
372     char *targ;
373     int32_t *offs;
374     int i;
375     int32_t   realBufferSize;
376     char *realBufferEnd;
377     const UChar *realSourceEnd;
378     const UChar *sourceLimit;
379     UBool checkOffsets = true;
380     UBool doFlush;
381 
382     for(i=0;i<NEW_MAX_BUFFER;i++)
383         junkout[i] = (char)0xF0;
384     for(i=0;i<NEW_MAX_BUFFER;i++)
385         junokout[i] = 0xFF;
386 
387     setNuConvTestName(codepage, "FROM");
388 
389     log_verbose("\n=========  %s\n", gNuConvTestName);
390 
391     conv = my_ucnv_open(codepage, &status);
392 
393     if(U_FAILURE(status))
394     {
395         log_data_err("Couldn't open converter %s\n",codepage);
396         return TC_FAIL;
397     }
398     if(useFallback){
399         ucnv_setFallback(conv,useFallback);
400     }
401 
402     log_verbose("Converter opened..\n");
403 
404     src = source;
405     targ = junkout;
406     offs = junokout;
407 
408     realBufferSize = UPRV_LENGTHOF(junkout);
409     realBufferEnd = junkout + realBufferSize;
410     realSourceEnd = source + sourceLen;
411 
412     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
413         checkOffsets = false;
414 
415     do
416     {
417       end = nct_min(targ + gOutBufferSize, realBufferEnd);
418       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
419 
420       doFlush = (UBool)(sourceLimit == realSourceEnd);
421 
422       if(targ == realBufferEnd) {
423         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
424         return TC_FAIL;
425       }
426       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
427 
428 
429       status = U_ZERO_ERROR;
430 
431       ucnv_fromUnicode (conv,
432                         &targ,
433                         end,
434                         &src,
435                         sourceLimit,
436                         checkOffsets ? offs : NULL,
437                         doFlush, /* flush if we're at the end of the input data */
438                         &status);
439     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
440 
441     if(U_FAILURE(status)) {
442       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
443       return TC_FAIL;
444     }
445 
446     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447                 sourceLen, targ-junkout);
448 
449     if(getTestOption(VERBOSITY_OPTION))
450     {
451       char junk[9999];
452       char offset_str[9999];
453       char *ptr;
454 
455       junk[0] = 0;
456       offset_str[0] = 0;
457       for(ptr = junkout;ptr<targ;ptr++) {
458         snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
459         snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
460       }
461 
462       log_verbose(junk);
463       printSeq((const uint8_t *)expect, expectLen);
464       if ( checkOffsets ) {
465         log_verbose("\nOffsets:");
466         log_verbose(offset_str);
467       }
468       log_verbose("\n");
469     }
470     ucnv_close(conv);
471 
472     if(expectLen != targ-junkout) {
473       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475       fprintf(stderr, "Got:\n");
476       printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
477       fprintf(stderr, "Expected:\n");
478       printSeqErr((const unsigned char*)expect, expectLen);
479       return TC_MISMATCH;
480     }
481 
482     if (checkOffsets && (expectOffsets != 0) ) {
483       log_verbose("comparing %d offsets..\n", targ-junkout);
484       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
485         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
486         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
487         log_err("\n");
488         log_err("Got  :     ");
489         for(p=junkout;p<targ;p++) {
490           log_err("%d,", junokout[p-junkout]);
491         }
492         log_err("\n");
493         log_err("Expected:  ");
494         for(i=0; i<(targ-junkout); i++) {
495           log_err("%d,", expectOffsets[i]);
496         }
497         log_err("\n");
498       }
499     }
500 
501     log_verbose("comparing..\n");
502     if(!memcmp(junkout, expect, expectLen)) {
503       log_verbose("Matches!\n");
504       return TC_OK;
505     } else {
506       log_err("String does not match u->%s\n", gNuConvTestName);
507       printUSeqErr(source, sourceLen);
508       fprintf(stderr, "Got:\n");
509       printSeqErr((const unsigned char *)junkout, expectLen);
510       fprintf(stderr, "Expected:\n");
511       printSeqErr((const unsigned char *)expect, expectLen);
512 
513       return TC_MISMATCH;
514     }
515 }
516 
517 /* Note: This function uses global variables and it will not do offset
518 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)519 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
520                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
521 {
522     UErrorCode status = U_ZERO_ERROR;
523     UConverter *conv = 0;
524     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
525     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
526     const char *src;
527     const char *realSourceEnd;
528     const char *srcLimit;
529     UChar *p;
530     UChar *targ;
531     UChar *end;
532     int32_t *offs;
533     int i;
534     UBool   checkOffsets = true;
535 
536     int32_t   realBufferSize;
537     UChar *realBufferEnd;
538 
539 
540     for(i=0;i<NEW_MAX_BUFFER;i++)
541         junkout[i] = 0xFFFE;
542 
543     for(i=0;i<NEW_MAX_BUFFER;i++)
544         junokout[i] = -1;
545 
546     setNuConvTestName(codepage, "TO");
547 
548     log_verbose("\n=========  %s\n", gNuConvTestName);
549 
550     conv = my_ucnv_open(codepage, &status);
551 
552     if(U_FAILURE(status))
553     {
554         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
555         return TC_FAIL;
556     }
557     if(useFallback){
558         ucnv_setFallback(conv,useFallback);
559     }
560     log_verbose("Converter opened..\n");
561 
562     src = (const char *)source;
563     targ = junkout;
564     offs = junokout;
565 
566     realBufferSize = UPRV_LENGTHOF(junkout);
567     realBufferEnd = junkout + realBufferSize;
568     realSourceEnd = src + sourcelen;
569 
570     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
571         checkOffsets = false;
572 
573     do
574     {
575         end = nct_min( targ + gOutBufferSize, realBufferEnd);
576         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
577 
578         if(targ == realBufferEnd)
579         {
580             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
581             return TC_FAIL;
582         }
583         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
584 
585         /* oldTarg = targ; */
586 
587         status = U_ZERO_ERROR;
588 
589         ucnv_toUnicode (conv,
590                 &targ,
591                 end,
592                 &src,
593                 srcLimit,
594                 checkOffsets ? offs : NULL,
595                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
596                 &status);
597 
598         /*        offs += (targ-oldTarg); */
599 
600       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
601 
602     if(U_FAILURE(status))
603     {
604         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
605         return TC_FAIL;
606     }
607 
608     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609         sourcelen, targ-junkout);
610     if(getTestOption(VERBOSITY_OPTION))
611     {
612         char junk[9999];
613         char offset_str[9999];
614         UChar *ptr;
615 
616         junk[0] = 0;
617         offset_str[0] = 0;
618 
619         for(ptr = junkout;ptr<targ;ptr++)
620         {
621             snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
622             snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
623         }
624 
625         log_verbose(junk);
626         printUSeq(expect, expectlen);
627         if ( checkOffsets )
628           {
629             log_verbose("\nOffsets:");
630             log_verbose(offset_str);
631           }
632         log_verbose("\n");
633     }
634     ucnv_close(conv);
635 
636     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
637 
638     if (checkOffsets && (expectOffsets != 0))
639     {
640         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
641             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
642             log_err("Got:      ");
643             for(p=junkout;p<targ;p++) {
644                 log_err("%d,", junokout[p-junkout]);
645             }
646             log_err("\n");
647             log_err("Expected: ");
648             for(i=0; i<(targ-junkout); i++) {
649                 log_err("%d,", expectOffsets[i]);
650             }
651             log_err("\n");
652             log_err("output:   ");
653             for(i=0; i<(targ-junkout); i++) {
654                 log_err("%X,", junkout[i]);
655             }
656             log_err("\n");
657             log_err("input:    ");
658             for(i=0; i<(src-(const char *)source); i++) {
659                 log_err("%X,", (unsigned char)source[i]);
660             }
661             log_err("\n");
662         }
663     }
664 
665     if(!memcmp(junkout, expect, expectlen*2))
666     {
667         log_verbose("Matches!\n");
668         return TC_OK;
669     }
670     else
671     {
672         log_err("String does not match. %s\n", gNuConvTestName);
673         log_verbose("String does not match. %s\n", gNuConvTestName);
674         printf("\nGot:");
675         printUSeqErr(junkout, expectlen);
676         printf("\nExpected:");
677         printUSeqErr(expect, expectlen);
678         return TC_MISMATCH;
679     }
680 }
681 
682 
TestNewConvertWithBufferSizes(int32_t outsize,int32_t insize)683 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
684 {
685 /** test chars #1 */
686     /*  1 2 3  1Han 2Han 3Han .  */
687     static const UChar   sampleText[] =
688      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689     static const UChar sampleTextRoundTripUnmappable[] =
690     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
691 
692 
693     static const uint8_t expectedUTF8[] =
694      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695     static const int32_t toUTF8Offs[] =
696      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697     static const int32_t fmUTF8Offs[] =
698      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
699 
700 #ifdef U_ENABLE_GENERIC_ISO_2022
701     /* Same as UTF8, but with ^[%B preceding */
702     static const const uint8_t expectedISO2022[] =
703      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704     static const int32_t toISO2022Offs[]     =
705      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707     static const int32_t fmISO2022Offs[] =
708      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
709 #endif
710 
711     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712     static const uint8_t expectedIBM930[] =
713      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714     static const int32_t toIBM930Offs[] =
715      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716     static const int32_t fmIBM930Offs[] =
717      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
718 
719     /* 1 2 3 0 h1 h2 h3 . MBCS*/
720     static const uint8_t expectedIBM943[] =
721      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722     static const int32_t toIBM943Offs    [] =
723      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724     static const int32_t fmIBM943Offs[] =
725      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
726 
727     /* 1 2 3 0 h1 h2 h3 . DBCS*/
728     static const uint8_t expectedIBM9027[] =
729      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730     static const int32_t toIBM9027Offs    [] =
731      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
732 
733      /* 1 2 3 0 <?> <?> <?> . SBCS*/
734     static const uint8_t expectedIBM920[] =
735      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736     static const int32_t toIBM920Offs    [] =
737      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
738 
739     /* 1 2 3 0 <?> <?> <?> . SBCS*/
740     static const uint8_t expectedISO88593[] =
741      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742     static const int32_t toISO88593Offs[]     =
743      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
744 
745     /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746     static const uint8_t expectedLATIN1[] =
747      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748     static const int32_t toLATIN1Offs[]     =
749      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
750 
751 
752     /*  etc */
753     static const uint8_t expectedUTF16BE[] =
754      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755     static const int32_t toUTF16BEOffs[]=
756      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757     static const int32_t fmUTF16BEOffs[] =
758      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
759 
760     static const uint8_t expectedUTF16LE[] =
761      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762     static const int32_t toUTF16LEOffs[]=
763      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764     static const int32_t fmUTF16LEOffs[] =
765      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
766 
767     static const uint8_t expectedUTF32BE[] =
768      { 0x00, 0x00, 0x00, 0x31,
769        0x00, 0x00, 0x00, 0x32,
770        0x00, 0x00, 0x00, 0x33,
771        0x00, 0x00, 0x00, 0x00,
772        0x00, 0x00, 0x4e, 0x00,
773        0x00, 0x00, 0x4e, 0x8c,
774        0x00, 0x00, 0x4e, 0x09,
775        0x00, 0x00, 0x00, 0x2e,
776        0x00, 0x02, 0x00, 0x21 };
777     static const int32_t toUTF32BEOffs[]=
778      { 0x00, 0x00, 0x00, 0x00,
779        0x01, 0x01, 0x01, 0x01,
780        0x02, 0x02, 0x02, 0x02,
781        0x03, 0x03, 0x03, 0x03,
782        0x04, 0x04, 0x04, 0x04,
783        0x05, 0x05, 0x05, 0x05,
784        0x06, 0x06, 0x06, 0x06,
785        0x07, 0x07, 0x07, 0x07,
786        0x08, 0x08, 0x08, 0x08,
787        0x08, 0x08, 0x08, 0x08 };
788     static const int32_t fmUTF32BEOffs[] =
789      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
790 
791     static const uint8_t expectedUTF32LE[] =
792      { 0x31, 0x00, 0x00, 0x00,
793        0x32, 0x00, 0x00, 0x00,
794        0x33, 0x00, 0x00, 0x00,
795        0x00, 0x00, 0x00, 0x00,
796        0x00, 0x4e, 0x00, 0x00,
797        0x8c, 0x4e, 0x00, 0x00,
798        0x09, 0x4e, 0x00, 0x00,
799        0x2e, 0x00, 0x00, 0x00,
800        0x21, 0x00, 0x02, 0x00 };
801     static const int32_t toUTF32LEOffs[]=
802      { 0x00, 0x00, 0x00, 0x00,
803        0x01, 0x01, 0x01, 0x01,
804        0x02, 0x02, 0x02, 0x02,
805        0x03, 0x03, 0x03, 0x03,
806        0x04, 0x04, 0x04, 0x04,
807        0x05, 0x05, 0x05, 0x05,
808        0x06, 0x06, 0x06, 0x06,
809        0x07, 0x07, 0x07, 0x07,
810        0x08, 0x08, 0x08, 0x08,
811        0x08, 0x08, 0x08, 0x08 };
812     static const int32_t fmUTF32LEOffs[] =
813      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
814 
815 
816 
817 
818 /** Test chars #2 **/
819 
820     /* Sahha [health],  slashed h's */
821     static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822     static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
823 
824     /* LMBCS */
825     static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826     static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827     static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828     static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829     /*********************************** START OF CODE finally *************/
830 
831     gInBufferSize = insize;
832     gOutBufferSize = outsize;
833 
834     log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
835 
836 
837     /*UTF-8*/
838     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
839         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,false );
840 
841     log_verbose("Test surrogate behaviour for UTF8\n");
842     {
843         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844         static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
845                            0xf0, 0x90, 0x90, 0x81,
846                            0xef, 0xbf, 0xbd
847         };
848         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849         testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
850                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,false );
851 
852 
853     }
854 
855 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
856     /*ISO-2022*/
857     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
858         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,false );
859 #endif
860 
861     /*UTF16 LE*/
862     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
863         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,false );
864     /*UTF16 BE*/
865     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
866         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,false );
867     /*UTF32 LE*/
868     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
869         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,false );
870     /*UTF32 BE*/
871     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
872         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,false );
873 
874     /*LATIN_1*/
875     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
876         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,false );
877 
878 #if !UCONFIG_NO_LEGACY_CONVERSION
879     /*EBCDIC_STATEFUL*/
880     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
881         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,false );
882 
883     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
884         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
885 
886     /*MBCS*/
887 
888     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
889         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,false );
890     /*DBCS*/
891     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
892         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,false );
893     /*SBCS*/
894     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
895         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,false );
896     /*SBCS*/
897     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
898         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
899 #endif
900 
901 
902 /****/
903 
904     /*UTF-8*/
905     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
906         sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,false);
907 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
908     /*ISO-2022*/
909     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
910         sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,false);
911 #endif
912 
913     /*UTF16 LE*/
914     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
915         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
916     /*UTF16 BE*/
917     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
918         sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,false);
919     /*UTF32 LE*/
920     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
921         sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,false);
922     /*UTF32 BE*/
923     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
924         sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,false);
925 
926 #if !UCONFIG_NO_LEGACY_CONVERSION
927     /*EBCDIC_STATEFUL*/
928     testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
929             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,false);
930     /*MBCS*/
931     testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
932             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,false);
933 #endif
934 
935     /* Try it again to make sure it still works */
936     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
937         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
938 
939 #if !UCONFIG_NO_LEGACY_CONVERSION
940     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
941         malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,false);
942 
943     testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
944         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,false );
945 
946     /*LMBCS*/
947     testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
948         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,false );
949     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
950         LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,false);
951 #endif
952 
953     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
954     {
955         /* encode directly set D and set O */
956         static const uint8_t utf7[] = {
957             /*
958                 Hi Mom -+Jjo--!
959                 A+ImIDkQ.
960                 +-
961                 +ZeVnLIqe-
962             */
963             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
965             0x2b, 0x2d,
966             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
967         };
968         static const UChar unicode[] = {
969             /*
970                 Hi Mom -<WHITE SMILING FACE>-!
971                 A<NOT IDENTICAL TO><ALPHA>.
972                 +
973                 [Japanese word "nihongo"]
974             */
975             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976             0x41, 0x2262, 0x0391, 0x2e,
977             0x2b,
978             0x65e5, 0x672c, 0x8a9e
979         };
980         static const int32_t toUnicodeOffsets[] = {
981             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
982             15, 17, 19, 23,
983             24,
984             27, 29, 32
985         };
986         static const int32_t fromUnicodeOffsets[] = {
987             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988             11, 12, 12, 12, 13, 13, 13, 13, 14,
989             15, 15,
990             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
991         };
992 
993         /* same but escaping set O (the exclamation mark) */
994         static const uint8_t utf7Restricted[] = {
995             /*
996                 Hi Mom -+Jjo--+ACE-
997                 A+ImIDkQ.
998                 +-
999                 +ZeVnLIqe-
1000             */
1001             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1003             0x2b, 0x2d,
1004             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1005         };
1006         static const int32_t toUnicodeOffsetsR[] = {
1007             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1008             19, 21, 23, 27,
1009             28,
1010             31, 33, 36
1011         };
1012         static const int32_t fromUnicodeOffsetsR[] = {
1013             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014             11, 12, 12, 12, 13, 13, 13, 13, 14,
1015             15, 15,
1016             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1017         };
1018 
1019         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,false);
1020 
1021         testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,false);
1022 
1023         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,false);
1024 
1025         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,false);
1026     }
1027 
1028     /*
1029      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030      * modified according to RFC 2060,
1031      * and supplemented with the one example in RFC 2060 itself.
1032      */
1033     {
1034         static const uint8_t imap[] = {
1035             /*  Hi Mom -&Jjo--!
1036                 A&ImIDkQ-.
1037                 &-
1038                 &ZeVnLIqe-
1039                 \
1040                 ~peter
1041                 /mail
1042                 /&ZeVnLIqe-
1043                 /&U,BTFw-
1044             */
1045             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1047             0x26, 0x2d,
1048             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1049             0x5c,
1050             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1054         };
1055         static const UChar unicode[] = {
1056             /*  Hi Mom -<WHITE SMILING FACE>-!
1057                 A<NOT IDENTICAL TO><ALPHA>.
1058                 &
1059                 [Japanese word "nihongo"]
1060                 \
1061                 ~peter
1062                 /mail
1063                 /<65e5, 672c, 8a9e>
1064                 /<53f0, 5317>
1065             */
1066             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067             0x41, 0x2262, 0x0391, 0x2e,
1068             0x26,
1069             0x65e5, 0x672c, 0x8a9e,
1070             0x5c,
1071             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073             0x2f, 0x65e5, 0x672c, 0x8a9e,
1074             0x2f, 0x53f0, 0x5317
1075         };
1076         static const int32_t toUnicodeOffsets[] = {
1077             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1078             15, 17, 19, 24,
1079             25,
1080             28, 30, 33,
1081             37,
1082             38, 39, 40, 41, 42, 43,
1083             44, 45, 46, 47, 48,
1084             49, 51, 53, 56,
1085             60, 62, 64
1086         };
1087         static const int32_t fromUnicodeOffsets[] = {
1088             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1090             15, 15,
1091             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1092             19,
1093             20, 21, 22, 23, 24, 25,
1094             26, 27, 28, 29, 30,
1095             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096             35, 36, 36, 36, 37, 37, 37, 37, 37
1097         };
1098 
1099         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,false);
1100 
1101         testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,false);
1102     }
1103 
1104     /* Test UTF-8 bad data handling*/
1105     {
1106         static const uint8_t utf8[]={
1107             0x61,
1108             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1109             0x00,
1110             0x62,
1111             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1112             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1113             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1114             0xdf, 0xbf,                     /* 7ff */
1115             0xbf,                           /* truncated tail */
1116             0xf4, 0x90, 0x80, 0x80,         /* 110000 */
1117             0x02
1118         };
1119 
1120         static const uint16_t utf8Expected[]={
1121             0x0061,
1122             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1123             0x0000,
1124             0x0062,
1125             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1127             0xdbff, 0xdfff,
1128             0x07ff,
1129             0xfffd,
1130             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1131             0x0002
1132         };
1133 
1134         static const int32_t utf8Offsets[]={
1135             0,
1136             1, 2, 3, 4,
1137             5,
1138             6,
1139             7, 8, 9, 10, 11,
1140             12, 13, 14, 15, 16,
1141             17, 17,
1142             21,
1143             23,
1144             24, 25, 26, 27,
1145             28
1146         };
1147         testConvertToU(utf8, sizeof(utf8),
1148                        utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,false);
1149 
1150     }
1151 
1152     /* Test UTF-32BE bad data handling*/
1153     {
1154         static const uint8_t utf32[]={
1155             0x00, 0x00, 0x00, 0x61,
1156             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1157             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1158             0x00, 0x00, 0x00, 0x62,
1159             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1160             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1161             0x00, 0x00, 0x01, 0x62,
1162             0x00, 0x00, 0x02, 0x62
1163         };
1164         static const uint16_t utf32Expected[]={
1165             0x0061,
1166             0xfffd,         /* 0x110000 out of range */
1167             0xDBFF,         /* 0x10FFFF in range */
1168             0xDFFF,
1169             0x0062,
1170             0xfffd,         /* 0xffffffff out of range */
1171             0xfffd,         /* 0x7fffffff out of range */
1172             0x0162,
1173             0x0262
1174         };
1175         static const int32_t utf32Offsets[]={
1176             0, 4, 8, 8, 12, 16, 20, 24, 28
1177         };
1178         static const uint8_t utf32ExpectedBack[]={
1179             0x00, 0x00, 0x00, 0x61,
1180             0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1181             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1182             0x00, 0x00, 0x00, 0x62,
1183             0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1184             0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1185             0x00, 0x00, 0x01, 0x62,
1186             0x00, 0x00, 0x02, 0x62
1187         };
1188         static const int32_t utf32OffsetsBack[]={
1189             0,0,0,0,
1190             1,1,1,1,
1191             2,2,2,2,
1192             4,4,4,4,
1193             5,5,5,5,
1194             6,6,6,6,
1195             7,7,7,7,
1196             8,8,8,8
1197         };
1198 
1199         testConvertToU(utf32, sizeof(utf32),
1200                        utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,false);
1201         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1202             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, false);
1203     }
1204 
1205     /* Test UTF-32LE bad data handling*/
1206     {
1207         static const uint8_t utf32[]={
1208             0x61, 0x00, 0x00, 0x00,
1209             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1210             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1211             0x62, 0x00, 0x00, 0x00,
1212             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1213             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1214             0x62, 0x01, 0x00, 0x00,
1215             0x62, 0x02, 0x00, 0x00,
1216         };
1217 
1218         static const uint16_t utf32Expected[]={
1219             0x0061,
1220             0xfffd,         /* 0x110000 out of range */
1221             0xDBFF,         /* 0x10FFFF in range */
1222             0xDFFF,
1223             0x0062,
1224             0xfffd,         /* 0xffffffff out of range */
1225             0xfffd,         /* 0x7fffffff out of range */
1226             0x0162,
1227             0x0262
1228         };
1229         static const int32_t utf32Offsets[]={
1230             0, 4, 8, 8, 12, 16, 20, 24, 28
1231         };
1232         static const uint8_t utf32ExpectedBack[]={
1233             0x61, 0x00, 0x00, 0x00,
1234             0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1235             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1236             0x62, 0x00, 0x00, 0x00,
1237             0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1238             0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1239             0x62, 0x01, 0x00, 0x00,
1240             0x62, 0x02, 0x00, 0x00
1241         };
1242         static const int32_t utf32OffsetsBack[]={
1243             0,0,0,0,
1244             1,1,1,1,
1245             2,2,2,2,
1246             4,4,4,4,
1247             5,5,5,5,
1248             6,6,6,6,
1249             7,7,7,7,
1250             8,8,8,8
1251         };
1252         testConvertToU(utf32, sizeof(utf32),
1253             utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,false );
1254         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1255             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, false);
1256     }
1257 }
1258 
TestCoverageMBCS(void)1259 static void TestCoverageMBCS(void){
1260 #if 0
1261     UErrorCode status = U_ZERO_ERROR;
1262     const char *directory = loadTestData(&status);
1263     char* tdpath = NULL;
1264     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265     int len = strlen(directory);
1266     char* index=NULL;
1267 
1268     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1269     uprv_strcpy(saveDirectory,u_getDataDirectory());
1270     log_verbose("Retrieved data directory %s \n",saveDirectory);
1271     uprv_strcpy(tdpath,directory);
1272     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1273 
1274     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1275             *(index+1)=0;
1276     }
1277     u_setDataDirectory(tdpath);
1278     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1279 #endif
1280 
1281     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1282       which is test file for MBCS conversion with single-byte codepage data.*/
1283     {
1284 
1285         /* MBCS with single byte codepage data test1.ucm*/
1286         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1289 
1290         /*from Unicode*/
1291         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1292             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,false );
1293     }
1294 
1295     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1296       which is test file for MBCS conversion with three-byte codepage data.*/
1297     {
1298 
1299         /* MBCS with three byte codepage data test3.ucm*/
1300         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1302         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1303 
1304         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1307 
1308         /*from Unicode*/
1309         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1310             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,false );
1311 
1312         /*to Unicode*/
1313         testConvertToU(test3input, sizeof(test3input),
1314             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,false);
1315 
1316     }
1317 
1318     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1319       which is test file for MBCS conversion with four-byte codepage data.*/
1320     {
1321 
1322         /* MBCS with three byte codepage data test4.ucm*/
1323         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1325         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1326 
1327         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1330 
1331         /*from Unicode*/
1332         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1333             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,false );
1334 
1335         /*to Unicode*/
1336         testConvertToU(test4input, sizeof(test4input),
1337             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,false );
1338 
1339     }
1340 #if 0
1341     free(tdpath);
1342     /* restore the original data directory */
1343     log_verbose("Setting the data directory to %s \n", saveDirectory);
1344     u_setDataDirectory(saveDirectory);
1345     free(saveDirectory);
1346 #endif
1347 
1348 }
1349 
TestConverterType(const char * convName,UConverterType convType)1350 static void TestConverterType(const char *convName, UConverterType convType) {
1351     UConverter* myConverter;
1352     UErrorCode err = U_ZERO_ERROR;
1353 
1354     myConverter = my_ucnv_open(convName, &err);
1355 
1356     if (U_FAILURE(err)) {
1357         log_data_err("Failed to create an %s converter\n", convName);
1358         return;
1359     }
1360     else
1361     {
1362         if (ucnv_getType(myConverter)!=convType) {
1363             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364                 convName, convType);
1365         }
1366         else {
1367             log_verbose("ucnv_getType %s ok\n", convName);
1368         }
1369     }
1370     ucnv_close(myConverter);
1371 }
1372 
TestConverterTypesAndStarters(void)1373 static void TestConverterTypesAndStarters(void)
1374 {
1375 #if !UCONFIG_NO_LEGACY_CONVERSION
1376     UConverter* myConverter;
1377     UErrorCode err = U_ZERO_ERROR;
1378     UBool mystarters[256];
1379 
1380 /*    const UBool expectedKSCstarters[256] = {
1381         false, false, false, false, false, false, false, false, false, false,
1382         false, false, false, false, false, false, false, false, false, false,
1383         false, false, false, false, false, false, false, false, false, false,
1384         false, false, false, false, false, false, false, false, false, false,
1385         false, false, false, false, false, false, false, false, false, false,
1386         false, false, false, false, false, false, false, false, false, false,
1387         false, false, false, false, false, false, false, false, false, false,
1388         false, false, false, false, false, false, false, false, false, false,
1389         false, false, false, false, false, false, false, false, false, false,
1390         false, false, false, false, false, false, false, false, false, false,
1391         false, false, false, false, false, false, false, false, false, false,
1392         false, false, false, false, false, false, false, false, false, false,
1393         false, false, false, false, false, false, false, false, false, false,
1394         false, false, false, false, false, false, false, false, false, false,
1395         false, false, false, true, true, true, true, true, true, true,
1396         true, true, true, true, true, true, true, true, true, true,
1397         true, true, true, true, true, true, true, true, true, true,
1398         true, true, true, false, false, true, true, true, true, true,
1399         true, true, true, true, true, true, true, true, true, true,
1400         true, true, true, true, true, true, true, true, true, true,
1401         true, true, true, true, true, true, true, true, true, true,
1402         true, true, true, true, true, true, true, true, true, true,
1403         true, true, true, true, true, true, true, true, true, true,
1404         true, true, true, true, true, true, true, true, true, true,
1405         true, true, true, true, true, true, true, true, true, true,
1406         true, true, true, true, true, true};*/
1407 
1408 
1409     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1410 
1411     myConverter = ucnv_open("ksc", &err);
1412     if (U_FAILURE(err)) {
1413       log_data_err("Failed to create an ibm-ksc converter\n");
1414       return;
1415     }
1416     else
1417     {
1418         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1419             log_err("ucnv_getType Failed for ibm-949\n");
1420         else
1421             log_verbose("ucnv_getType ibm-949 ok\n");
1422 
1423         if(myConverter!=NULL)
1424             ucnv_getStarters(myConverter, mystarters, &err);
1425 
1426         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427           log_err("Failed ucnv_getStarters for ksc\n");
1428           else
1429           log_verbose("ucnv_getStarters ok\n");*/
1430 
1431     }
1432     ucnv_close(myConverter);
1433 
1434     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1435     TestConverterType("ibm-878", UCNV_SBCS);
1436 #endif
1437 
1438     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1439 
1440     TestConverterType("ibm-1208", UCNV_UTF8);
1441 
1442     TestConverterType("utf-8", UCNV_UTF8);
1443     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1444     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1445     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1446     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1447 
1448 #if !UCONFIG_NO_LEGACY_CONVERSION
1449 
1450 #if defined(U_ENABLE_GENERIC_ISO_2022)
1451     TestConverterType("iso-2022", UCNV_ISO_2022);
1452 #endif
1453 
1454     TestConverterType("hz", UCNV_HZ);
1455 #endif
1456 
1457     TestConverterType("scsu", UCNV_SCSU);
1458 
1459 #if !UCONFIG_NO_LEGACY_CONVERSION
1460     TestConverterType("x-iscii-de", UCNV_ISCII);
1461 #endif
1462 
1463     TestConverterType("ascii", UCNV_US_ASCII);
1464     TestConverterType("utf-7", UCNV_UTF7);
1465     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1466     TestConverterType("bocu-1", UCNV_BOCU1);
1467 }
1468 
1469 static void
TestAmbiguousConverter(UConverter * cnv)1470 TestAmbiguousConverter(UConverter *cnv) {
1471     static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1472     UChar outUnicode[20]={ 0, 0, 0, 0 };
1473 
1474     const char *s;
1475     UChar *u;
1476     UErrorCode errorCode;
1477     UBool isAmbiguous;
1478 
1479     /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1480     errorCode=U_ZERO_ERROR;
1481     s=inBytes;
1482     u=outUnicode;
1483     ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, true, &errorCode);
1484     if(U_FAILURE(errorCode)) {
1485         /* we do not care about general failures in this test; the input may just not be mappable */
1486         return;
1487     }
1488 
1489     if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1490         /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491         /* There are some encodings that are partially ASCII based,
1492         like the ISO-7 and GSM series of codepages, which we ignore. */
1493         return;
1494     }
1495 
1496     isAmbiguous=ucnv_isAmbiguous(cnv);
1497 
1498     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1499     if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1500         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1501             ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1502         return;
1503     }
1504 
1505     if(outUnicode[2]!=0x5c) {
1506         /* needs fixup, fix it */
1507         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1508         if(outUnicode[2]!=0x5c) {
1509             /* the fix failed */
1510             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1511             return;
1512         }
1513     }
1514 }
1515 
TestAmbiguous(void)1516 static void TestAmbiguous(void)
1517 {
1518     UErrorCode status = U_ZERO_ERROR;
1519     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1520     static const char target[] = {
1521         /* "\\usr\\local\\share\\data\\icutest.txt" */
1522         0x5c, 0x75, 0x73, 0x72,
1523         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525         0x5c, 0x64, 0x61, 0x74, 0x61,
1526         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1527         0
1528     };
1529     UChar asciiResult[200], sjisResult[200];
1530     int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1531     const char *name;
1532 
1533     /* enumerate all converters */
1534     status=U_ZERO_ERROR;
1535     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1536         cnv=ucnv_open(name, &status);
1537         if(U_SUCCESS(status)) {
1538             /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1539             const char* cnvName = ucnv_getName(cnv, &status);
1540             if (strlen(cnvName) < 8 ||
1541                 strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1542             TestAmbiguousConverter(cnv);
1543             }
1544             /* END android-changed */
1545             ucnv_close(cnv);
1546         } else {
1547             log_err("error: unable to open available converter \"%s\"\n", name);
1548             status=U_ZERO_ERROR;
1549         }
1550     }
1551 
1552 #if !UCONFIG_NO_LEGACY_CONVERSION
1553     sjis_cnv = ucnv_open("ibm-943", &status);
1554     if (U_FAILURE(status))
1555     {
1556         log_data_err("Failed to create a SJIS converter\n");
1557         return;
1558     }
1559     ascii_cnv = ucnv_open("LATIN-1", &status);
1560     if (U_FAILURE(status))
1561     {
1562         log_data_err("Failed to create a LATIN-1 converter\n");
1563         ucnv_close(sjis_cnv);
1564         return;
1565     }
1566     /* convert target from SJIS to Unicode */
1567     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1568     if (U_FAILURE(status))
1569     {
1570         log_err("Failed to convert the SJIS string.\n");
1571         ucnv_close(sjis_cnv);
1572         ucnv_close(ascii_cnv);
1573         return;
1574     }
1575     /* convert target from Latin-1 to Unicode */
1576     /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1577     if (U_FAILURE(status))
1578     {
1579         log_err("Failed to convert the Latin-1 string.\n");
1580         ucnv_close(sjis_cnv);
1581         ucnv_close(ascii_cnv);
1582         return;
1583     }
1584     if (!ucnv_isAmbiguous(sjis_cnv))
1585     {
1586         log_err("SJIS converter should contain ambiguous character mappings.\n");
1587         ucnv_close(sjis_cnv);
1588         ucnv_close(ascii_cnv);
1589         return;
1590     }
1591     if (u_strcmp(sjisResult, asciiResult) == 0)
1592     {
1593         log_err("File separators for SJIS don't need to be fixed.\n");
1594     }
1595     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1596     if (u_strcmp(sjisResult, asciiResult) != 0)
1597     {
1598         log_err("Fixing file separator for SJIS failed.\n");
1599     }
1600     ucnv_close(sjis_cnv);
1601     ucnv_close(ascii_cnv);
1602 #endif
1603 }
1604 
1605 static void
TestSignatureDetection(void)1606 TestSignatureDetection(void){
1607     /* with null terminated strings */
1608     {
1609         static const char* data[] = {
1610                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1611                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1612                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1613                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1614 
1615                 "\xFE\xFF",             /* UTF-16BE */
1616                 "\xFF\xFE",             /* UTF-16LE */
1617                 "\xEF\xBB\xBF",         /* UTF-8    */
1618                 "\x0E\xFE\xFF",         /* SCSU     */
1619 
1620                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1621                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1622                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1623                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1624 
1625                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1626                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1627                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1628                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1629                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1630 
1631                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1632         };
1633         static const char* expected[] = {
1634                 "UTF-16BE",
1635                 "UTF-16LE",
1636                 "UTF-8",
1637                 "SCSU",
1638 
1639                 "UTF-16BE",
1640                 "UTF-16LE",
1641                 "UTF-8",
1642                 "SCSU",
1643 
1644                 "UTF-16BE",
1645                 "UTF-16LE",
1646                 "UTF-8",
1647                 "SCSU",
1648 
1649                 "UTF-7",
1650                 "UTF-7",
1651                 "UTF-7",
1652                 "UTF-7",
1653                 "UTF-7",
1654                 "UTF-EBCDIC"
1655         };
1656         static const int32_t expectedLength[] ={
1657             2,
1658             2,
1659             3,
1660             3,
1661 
1662             2,
1663             2,
1664             3,
1665             3,
1666 
1667             2,
1668             2,
1669             3,
1670             3,
1671 
1672             5,
1673             4,
1674             4,
1675             4,
1676             4,
1677             4
1678         };
1679         int i=0;
1680         UErrorCode err;
1681         int32_t signatureLength = -1;
1682         const char* source = NULL;
1683         const char* enc = NULL;
1684         for( ; i<UPRV_LENGTHOF(data); i++){
1685             err = U_ZERO_ERROR;
1686             source = data[i];
1687             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1688             if(U_FAILURE(err)){
1689                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1690                 continue;
1691             }
1692             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1693                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1694                 continue;
1695             }
1696             if(signatureLength != expectedLength[i]){
1697                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1698             }
1699         }
1700     }
1701     {
1702         static const char* data[] = {
1703                 "\xFE\xFF\x00",         /* UTF-16BE */
1704                 "\xFF\xFE\x00",         /* UTF-16LE */
1705                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1706                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1707                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1708                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1709                 "\xFE\xFF",             /* UTF-16BE */
1710                 "\xFF\xFE",             /* UTF-16LE */
1711                 "\xEF\xBB\xBF",         /* UTF-8    */
1712                 "\x0E\xFE\xFF",         /* SCSU     */
1713                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1714                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1715                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1716                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1717                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1718                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1719                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1720                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1721                 "\xFB\xEE\x28",         /* BOCU-1   */
1722                 "\xFF\x41\x42"          /* NULL     */
1723         };
1724         static const int len[] = {
1725             3,
1726             3,
1727             4,
1728             4,
1729             4,
1730             4,
1731             2,
1732             2,
1733             3,
1734             3,
1735             4,
1736             4,
1737             4,
1738             4,
1739             4,
1740             4,
1741             5,
1742             5,
1743             3,
1744             3
1745         };
1746 
1747         static const char* expected[] = {
1748                 "UTF-16BE",
1749                 "UTF-16LE",
1750                 "UTF-8",
1751                 "SCSU",
1752                 "UTF-32BE",
1753                 "UTF-32LE",
1754                 "UTF-16BE",
1755                 "UTF-16LE",
1756                 "UTF-8",
1757                 "SCSU",
1758                 "UTF-32BE",
1759                 "UTF-32LE",
1760                 "UTF-16BE",
1761                 "UTF-16LE",
1762                 "UTF-8",
1763                 "SCSU",
1764                 "UTF-32BE",
1765                 "UTF-32LE",
1766                 "BOCU-1",
1767                 NULL
1768         };
1769         static const int32_t expectedLength[] ={
1770             2,
1771             2,
1772             3,
1773             3,
1774             4,
1775             4,
1776             2,
1777             2,
1778             3,
1779             3,
1780             4,
1781             4,
1782             2,
1783             2,
1784             3,
1785             3,
1786             4,
1787             4,
1788             3,
1789             0
1790         };
1791         int i=0;
1792         UErrorCode err;
1793         int32_t signatureLength = -1;
1794         int32_t sourceLength=-1;
1795         const char* source = NULL;
1796         const char* enc = NULL;
1797         for( ; i<UPRV_LENGTHOF(data); i++){
1798             err = U_ZERO_ERROR;
1799             source = data[i];
1800             sourceLength = len[i];
1801             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1802             if(U_FAILURE(err)){
1803                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1804                 continue;
1805             }
1806             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1807                 if(expected[i] !=NULL){
1808                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1809                  continue;
1810                 }
1811             }
1812             if(signatureLength != expectedLength[i]){
1813                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1814             }
1815         }
1816     }
1817 }
1818 
TestUTF7(void)1819 static void TestUTF7(void) {
1820     /* test input */
1821     static const uint8_t in[]={
1822         /* H - +Jjo- - ! +- +2AHcAQ */
1823         0x48,
1824         0x2d,
1825         0x2b, 0x4a, 0x6a, 0x6f,
1826         0x2d, 0x2d,
1827         0x21,
1828         0x2b, 0x2d,
1829         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1830     };
1831 
1832     /* expected test results */
1833     static const int32_t results[]={
1834         /* number of bytes read, code point */
1835         1, 0x48,
1836         1, 0x2d,
1837         4, 0x263a, /* <WHITE SMILING FACE> */
1838         2, 0x2d,
1839         1, 0x21,
1840         2, 0x2b,
1841         7, 0x10401
1842     };
1843 
1844     const char *cnvName;
1845     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1846     UErrorCode errorCode=U_ZERO_ERROR;
1847     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1848     if(U_FAILURE(errorCode)) {
1849         log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1850         return;
1851     }
1852     TestNextUChar(cnv, source, limit, results, "UTF-7");
1853     /* Test the condition when source >= sourceLimit */
1854     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1855     cnvName = ucnv_getName(cnv, &errorCode);
1856     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1857         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1858     }
1859     ucnv_close(cnv);
1860 }
1861 
TestIMAP(void)1862 static void TestIMAP(void) {
1863     /* test input */
1864     static const uint8_t in[]={
1865         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1866         0x48,
1867         0x2d,
1868         0x26, 0x4a, 0x6a, 0x6f,
1869         0x2d, 0x2d,
1870         0x21,
1871         0x26, 0x2d,
1872         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1873     };
1874 
1875     /* expected test results */
1876     static const int32_t results[]={
1877         /* number of bytes read, code point */
1878         1, 0x48,
1879         1, 0x2d,
1880         4, 0x263a, /* <WHITE SMILING FACE> */
1881         2, 0x2d,
1882         1, 0x21,
1883         2, 0x26,
1884         7, 0x10401
1885     };
1886 
1887     const char *cnvName;
1888     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1889     UErrorCode errorCode=U_ZERO_ERROR;
1890     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1891     if(U_FAILURE(errorCode)) {
1892         log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1893         return;
1894     }
1895     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1896     /* Test the condition when source >= sourceLimit */
1897     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1898     cnvName = ucnv_getName(cnv, &errorCode);
1899     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1900         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1901     }
1902     ucnv_close(cnv);
1903 }
1904 
TestUTF8(void)1905 static void TestUTF8(void) {
1906     /* test input */
1907     static const uint8_t in[]={
1908         0x61,
1909         0xc2, 0x80,
1910         0xe0, 0xa0, 0x80,
1911         0xf0, 0x90, 0x80, 0x80,
1912         0xf4, 0x84, 0x8c, 0xa1,
1913         0xf0, 0x90, 0x90, 0x81
1914     };
1915 
1916     /* expected test results */
1917     static const int32_t results[]={
1918         /* number of bytes read, code point */
1919         1, 0x61,
1920         2, 0x80,
1921         3, 0x800,
1922         4, 0x10000,
1923         4, 0x104321,
1924         4, 0x10401
1925     };
1926 
1927     /* error test input */
1928     static const uint8_t in2[]={
1929         0x61,
1930         0xc0, 0x80,                     /* illegal non-shortest form */
1931         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1932         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1933         0xc0, 0xc0,                     /* illegal trail byte */
1934         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1935         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1936         0xfe,                           /* illegal byte altogether */
1937         0x62
1938     };
1939 
1940     /* expected error test results */
1941     static const int32_t results2[]={
1942         /* number of bytes read, code point */
1943         1, 0x61,
1944         22, 0x62
1945     };
1946 
1947     UConverterToUCallback cb;
1948     const void *p;
1949 
1950     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1951     UErrorCode errorCode=U_ZERO_ERROR;
1952     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1953     if(U_FAILURE(errorCode)) {
1954         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1955         return;
1956     }
1957     TestNextUChar(cnv, source, limit, results, "UTF-8");
1958     /* Test the condition when source >= sourceLimit */
1959     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1960 
1961     /* test error behavior with a skip callback */
1962     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1963     source=(const char *)in2;
1964     limit=(const char *)(in2+sizeof(in2));
1965     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1966 
1967     ucnv_close(cnv);
1968 }
1969 
TestCESU8(void)1970 static void TestCESU8(void) {
1971     /* test input */
1972     static const uint8_t in[]={
1973         0x61,
1974         0xc2, 0x80,
1975         0xe0, 0xa0, 0x80,
1976         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1977         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1978         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1979         0xef, 0xbf, 0xbc
1980     };
1981 
1982     /* expected test results */
1983     static const int32_t results[]={
1984         /* number of bytes read, code point */
1985         1, 0x61,
1986         2, 0x80,
1987         3, 0x800,
1988         6, 0x10000,
1989         3, 0xdc01,
1990         -1,0xd802,  /* may read 3 or 6 bytes */
1991         -1,0x10ffff,/* may read 0 or 3 bytes */
1992         3, 0xfffc
1993     };
1994 
1995     /* error test input */
1996     static const uint8_t in2[]={
1997         0x61,
1998         0xc0, 0x80,                     /* illegal non-shortest form */
1999         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
2000         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
2001         0xc0, 0xc0,                     /* illegal trail byte */
2002         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
2003         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
2004         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
2005         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2006         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2007         0xfe,                           /* illegal byte altogether */
2008         0x62
2009     };
2010 
2011     /* expected error test results */
2012     static const int32_t results2[]={
2013         /* number of bytes read, code point */
2014         1, 0x61,
2015         34, 0x62
2016     };
2017 
2018     UConverterToUCallback cb;
2019     const void *p;
2020 
2021     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2022     UErrorCode errorCode=U_ZERO_ERROR;
2023     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2024     if(U_FAILURE(errorCode)) {
2025         log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2026         return;
2027     }
2028     TestNextUChar(cnv, source, limit, results, "CESU-8");
2029     /* Test the condition when source >= sourceLimit */
2030     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2031 
2032     /* test error behavior with a skip callback */
2033     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2034     source=(const char *)in2;
2035     limit=(const char *)(in2+sizeof(in2));
2036     TestNextUChar(cnv, source, limit, results2, "CESU-8");
2037 
2038     ucnv_close(cnv);
2039 }
2040 
TestUTF16(void)2041 static void TestUTF16(void) {
2042     /* test input */
2043     static const uint8_t in1[]={
2044         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2045     };
2046     static const uint8_t in2[]={
2047         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2048     };
2049     static const uint8_t in3[]={
2050         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2051     };
2052 
2053     /* expected test results */
2054     static const int32_t results1[]={
2055         /* number of bytes read, code point */
2056         4, 0x4e00,
2057         2, 0xfeff
2058     };
2059     static const int32_t results2[]={
2060         /* number of bytes read, code point */
2061         4, 0x004e,
2062         2, 0xfffe
2063     };
2064     static const int32_t results3[]={
2065         /* number of bytes read, code point */
2066         2, 0xfefe,
2067         2, 0x4e00,
2068         2, 0xfeff,
2069         4, 0x20001
2070     };
2071 
2072     const char *source, *limit;
2073 
2074     UErrorCode errorCode=U_ZERO_ERROR;
2075     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2076     if(U_FAILURE(errorCode)) {
2077         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2078         return;
2079     }
2080 
2081     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2082     TestNextUChar(cnv, source, limit, results1, "UTF-16");
2083 
2084     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2085     ucnv_resetToUnicode(cnv);
2086     TestNextUChar(cnv, source, limit, results2, "UTF-16");
2087 
2088     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2089     ucnv_resetToUnicode(cnv);
2090     TestNextUChar(cnv, source, limit, results3, "UTF-16");
2091 
2092     /* Test the condition when source >= sourceLimit */
2093     ucnv_resetToUnicode(cnv);
2094     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2095 
2096     ucnv_close(cnv);
2097 }
2098 
TestUTF16BE(void)2099 static void TestUTF16BE(void) {
2100     /* test input */
2101     static const uint8_t in[]={
2102         0x00, 0x61,
2103         0x00, 0xc0,
2104         0x00, 0x31,
2105         0x00, 0xf4,
2106         0xce, 0xfe,
2107         0xd8, 0x01, 0xdc, 0x01
2108     };
2109 
2110     /* expected test results */
2111     static const int32_t results[]={
2112         /* number of bytes read, code point */
2113         2, 0x61,
2114         2, 0xc0,
2115         2, 0x31,
2116         2, 0xf4,
2117         2, 0xcefe,
2118         4, 0x10401
2119     };
2120 
2121     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2122     UErrorCode errorCode=U_ZERO_ERROR;
2123     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2124     if(U_FAILURE(errorCode)) {
2125         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2126         return;
2127     }
2128     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2129     /* Test the condition when source >= sourceLimit */
2130     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2131     /*Test for the condition where there is an invalid character*/
2132     {
2133         static const uint8_t source2[]={0x61};
2134         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2135         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2136     }
2137 #if 0
2138     /*
2139      * Test disabled because currently the UTF-16BE/LE converters are supposed
2140      * to not set errors for unpaired surrogates.
2141      * This may change with
2142      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2143      */
2144 
2145     /*Test for the condition where there is a surrogate pair*/
2146     {
2147         const uint8_t source2[]={0xd8, 0x01};
2148         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2149     }
2150 #endif
2151     ucnv_close(cnv);
2152 }
2153 
2154 static void
TestUTF16LE(void)2155 TestUTF16LE(void) {
2156     /* test input */
2157     static const uint8_t in[]={
2158         0x61, 0x00,
2159         0x31, 0x00,
2160         0x4e, 0x2e,
2161         0x4e, 0x00,
2162         0x01, 0xd8, 0x01, 0xdc
2163     };
2164 
2165     /* expected test results */
2166     static const int32_t results[]={
2167         /* number of bytes read, code point */
2168         2, 0x61,
2169         2, 0x31,
2170         2, 0x2e4e,
2171         2, 0x4e,
2172         4, 0x10401
2173     };
2174 
2175     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2176     UErrorCode errorCode=U_ZERO_ERROR;
2177     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2178     if(U_FAILURE(errorCode)) {
2179         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2180         return;
2181     }
2182     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2183     /* Test the condition when source >= sourceLimit */
2184     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2185     /*Test for the condition where there is an invalid character*/
2186     {
2187         static const uint8_t source2[]={0x61};
2188         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2189         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2190     }
2191 #if 0
2192     /*
2193      * Test disabled because currently the UTF-16BE/LE converters are supposed
2194      * to not set errors for unpaired surrogates.
2195      * This may change with
2196      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2197      */
2198 
2199     /*Test for the condition where there is a surrogate character*/
2200     {
2201         static const uint8_t source2[]={0x01, 0xd8};
2202         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2203     }
2204 #endif
2205 
2206     ucnv_close(cnv);
2207 }
2208 
TestUTF32(void)2209 static void TestUTF32(void) {
2210     /* test input */
2211     static const uint8_t in1[]={
2212         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2213     };
2214     static const uint8_t in2[]={
2215         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2216     };
2217     static const uint8_t in3[]={
2218         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2219     };
2220 
2221     /* expected test results */
2222     static const int32_t results1[]={
2223         /* number of bytes read, code point */
2224         8, 0x100f00,
2225         4, 0xfeff
2226     };
2227     static const int32_t results2[]={
2228         /* number of bytes read, code point */
2229         8, 0x0f1000,
2230         4, 0xfffe
2231     };
2232     static const int32_t results3[]={
2233         /* number of bytes read, code point */
2234         4, 0xfefe,
2235         4, 0x100f00,
2236         4, 0xfffd, /* unmatched surrogate */
2237         4, 0xfffd  /* unmatched surrogate */
2238     };
2239 
2240     const char *source, *limit;
2241 
2242     UErrorCode errorCode=U_ZERO_ERROR;
2243     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2244     if(U_FAILURE(errorCode)) {
2245         log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2246         return;
2247     }
2248 
2249     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2250     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2251 
2252     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2253     ucnv_resetToUnicode(cnv);
2254     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2255 
2256     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2257     ucnv_resetToUnicode(cnv);
2258     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2259 
2260     /* Test the condition when source >= sourceLimit */
2261     ucnv_resetToUnicode(cnv);
2262     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2263 
2264     ucnv_close(cnv);
2265 }
2266 
2267 static void
TestUTF32BE(void)2268 TestUTF32BE(void) {
2269     /* test input */
2270     static const uint8_t in[]={
2271         0x00, 0x00, 0x00, 0x61,
2272         0x00, 0x00, 0x30, 0x61,
2273         0x00, 0x00, 0xdc, 0x00,
2274         0x00, 0x00, 0xd8, 0x00,
2275         0x00, 0x00, 0xdf, 0xff,
2276         0x00, 0x00, 0xff, 0xfe,
2277         0x00, 0x10, 0xab, 0xcd,
2278         0x00, 0x10, 0xff, 0xff
2279     };
2280 
2281     /* expected test results */
2282     static const int32_t results[]={
2283         /* number of bytes read, code point */
2284         4, 0x61,
2285         4, 0x3061,
2286         4, 0xfffd,
2287         4, 0xfffd,
2288         4, 0xfffd,
2289         4, 0xfffe,
2290         4, 0x10abcd,
2291         4, 0x10ffff
2292     };
2293 
2294     /* error test input */
2295     static const uint8_t in2[]={
2296         0x00, 0x00, 0x00, 0x61,
2297         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2298         0x00, 0x00, 0x00, 0x62,
2299         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2300         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2301         0x00, 0x00, 0x01, 0x62,
2302         0x00, 0x00, 0x02, 0x62
2303     };
2304 
2305     /* expected error test results */
2306     static const int32_t results2[]={
2307         /* number of bytes read, code point */
2308         4,  0x61,
2309         8,  0x62,
2310         12, 0x162,
2311         4,  0x262
2312     };
2313 
2314     UConverterToUCallback cb;
2315     const void *p;
2316 
2317     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2318     UErrorCode errorCode=U_ZERO_ERROR;
2319     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2320     if(U_FAILURE(errorCode)) {
2321         log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2322         return;
2323     }
2324     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2325 
2326     /* Test the condition when source >= sourceLimit */
2327     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2328 
2329     /* test error behavior with a skip callback */
2330     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2331     source=(const char *)in2;
2332     limit=(const char *)(in2+sizeof(in2));
2333     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2334 
2335     ucnv_close(cnv);
2336 }
2337 
2338 static void
TestUTF32LE(void)2339 TestUTF32LE(void) {
2340     /* test input */
2341     static const uint8_t in[]={
2342         0x61, 0x00, 0x00, 0x00,
2343         0x61, 0x30, 0x00, 0x00,
2344         0x00, 0xdc, 0x00, 0x00,
2345         0x00, 0xd8, 0x00, 0x00,
2346         0xff, 0xdf, 0x00, 0x00,
2347         0xfe, 0xff, 0x00, 0x00,
2348         0xcd, 0xab, 0x10, 0x00,
2349         0xff, 0xff, 0x10, 0x00
2350     };
2351 
2352     /* expected test results */
2353     static const int32_t results[]={
2354         /* number of bytes read, code point */
2355         4, 0x61,
2356         4, 0x3061,
2357         4, 0xfffd,
2358         4, 0xfffd,
2359         4, 0xfffd,
2360         4, 0xfffe,
2361         4, 0x10abcd,
2362         4, 0x10ffff
2363     };
2364 
2365     /* error test input */
2366     static const uint8_t in2[]={
2367         0x61, 0x00, 0x00, 0x00,
2368         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2369         0x62, 0x00, 0x00, 0x00,
2370         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2371         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2372         0x62, 0x01, 0x00, 0x00,
2373         0x62, 0x02, 0x00, 0x00,
2374     };
2375 
2376     /* expected error test results */
2377     static const int32_t results2[]={
2378         /* number of bytes read, code point */
2379         4,  0x61,
2380         8,  0x62,
2381         12, 0x162,
2382         4,  0x262,
2383     };
2384 
2385     UConverterToUCallback cb;
2386     const void *p;
2387 
2388     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2389     UErrorCode errorCode=U_ZERO_ERROR;
2390     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2391     if(U_FAILURE(errorCode)) {
2392         log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2393         return;
2394     }
2395     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2396 
2397     /* Test the condition when source >= sourceLimit */
2398     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2399 
2400     /* test error behavior with a skip callback */
2401     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2402     source=(const char *)in2;
2403     limit=(const char *)(in2+sizeof(in2));
2404     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2405 
2406     ucnv_close(cnv);
2407 }
2408 
2409 static void
TestLATIN1(void)2410 TestLATIN1(void) {
2411     /* test input */
2412     static const uint8_t in[]={
2413        0x61,
2414        0x31,
2415        0x32,
2416        0xc0,
2417        0xf0,
2418        0xf4,
2419     };
2420 
2421     /* expected test results */
2422     static const int32_t results[]={
2423         /* number of bytes read, code point */
2424         1, 0x61,
2425         1, 0x31,
2426         1, 0x32,
2427         1, 0xc0,
2428         1, 0xf0,
2429         1, 0xf4,
2430     };
2431     static const uint16_t in1[] = {
2432         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2433         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2434         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2435         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2436         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2437         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2438         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2439         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2440         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2441         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2442         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2443         0xcb, 0x82
2444     };
2445     static const uint8_t out1[] = {
2446         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2447         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2448         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2449         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2450         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2451         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2452         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2453         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2454         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2455         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2456         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2457         0xcb, 0x82
2458     };
2459     static const uint16_t in2[]={
2460         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2461         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2462         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2463         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2464         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2465         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2466         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2467         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2468         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2469         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2470         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2471         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2472         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2473         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2474         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2475         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2476         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2477         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2478         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2479         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2480         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2481         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2482         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2483         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2484         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2485         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2486         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2487         0x37, 0x20, 0x2A, 0x2F,
2488     };
2489     static const unsigned char out2[]={
2490         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2491         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2492         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2493         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2494         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2495         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2496         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2497         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2498         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2499         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2500         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2501         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2502         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2503         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2504         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2505         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2506         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2507         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2508         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2509         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2510         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2511         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2512         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2513         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2514         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2515         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2516         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2517         0x37, 0x20, 0x2A, 0x2F,
2518     };
2519     const char *source=(const char *)in;
2520     const char *limit=(const char *)in+sizeof(in);
2521 
2522     UErrorCode errorCode=U_ZERO_ERROR;
2523     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2524     if(U_FAILURE(errorCode)) {
2525         log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2526         return;
2527     }
2528     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2529     /* Test the condition when source >= sourceLimit */
2530     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2531     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2532     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2533 
2534     ucnv_close(cnv);
2535 }
2536 
2537 static void
TestSBCS(void)2538 TestSBCS(void) {
2539     /* test input */
2540     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2541     /* expected test results */
2542     static const int32_t results[]={
2543         /* number of bytes read, code point */
2544         1, 0x61,
2545         1, 0xbf,
2546         1, 0xc4,
2547         1, 0x2021,
2548         1, 0xf8ff,
2549         1, 0x00d9
2550     };
2551 
2552     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2553     UErrorCode errorCode=U_ZERO_ERROR;
2554     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2555     if(U_FAILURE(errorCode)) {
2556         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2557         return;
2558     }
2559     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2560     /* Test the condition when source >= sourceLimit */
2561     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2562     /*Test for Illegal character */ /*
2563     {
2564     static const uint8_t input1[]={ 0xA1 };
2565     const char* illegalsource=(const char*)input1;
2566     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal character");
2567     }
2568    */
2569     ucnv_close(cnv);
2570 }
2571 
2572 static void
TestDBCS(void)2573 TestDBCS(void) {
2574     /* test input */
2575     static const uint8_t in[]={
2576         0x44, 0x6a,
2577         0xc4, 0x9c,
2578         0x7a, 0x74,
2579         0x46, 0xab,
2580         0x42, 0x5b,
2581 
2582     };
2583 
2584     /* expected test results */
2585     static const int32_t results[]={
2586         /* number of bytes read, code point */
2587         2, 0x00a7,
2588         2, 0xe1d2,
2589         2, 0x6962,
2590         2, 0xf842,
2591         2, 0xffe5,
2592     };
2593 
2594     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2595     UErrorCode errorCode=U_ZERO_ERROR;
2596 
2597     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2598     if(U_FAILURE(errorCode)) {
2599         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2600         return;
2601     }
2602     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2603     /* Test the condition when source >= sourceLimit */
2604     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2605     /*Test for the condition where there is an invalid character*/
2606     {
2607         static const uint8_t source2[]={0x1a, 0x1b};
2608         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2609     }
2610     /*Test for the condition where we have a truncated char*/
2611     {
2612         static const uint8_t source1[]={0xc4};
2613         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2614         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2615     }
2616     ucnv_close(cnv);
2617 }
2618 
2619 static void
TestMBCS(void)2620 TestMBCS(void) {
2621     /* test input */
2622     static const uint8_t in[]={
2623         0x01,
2624         0xa6, 0xa3,
2625         0x00,
2626         0xa6, 0xa1,
2627         0x08,
2628         0xc2, 0x76,
2629         0xc2, 0x78,
2630 
2631     };
2632 
2633     /* expected test results */
2634     static const int32_t results[]={
2635         /* number of bytes read, code point */
2636         1, 0x0001,
2637         2, 0x250c,
2638         1, 0x0000,
2639         2, 0x2500,
2640         1, 0x0008,
2641         2, 0xd60c,
2642         2, 0xd60e,
2643     };
2644 
2645     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2646     UErrorCode errorCode=U_ZERO_ERROR;
2647 
2648     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2649     if(U_FAILURE(errorCode)) {
2650         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2651         return;
2652     }
2653     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2654     /* Test the condition when source >= sourceLimit */
2655     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2656     /*Test for the condition where there is an invalid character*/
2657     {
2658         static const uint8_t source2[]={0xa1, 0x80};
2659         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2660     }
2661     /*Test for the condition where we have a truncated char*/
2662     {
2663         static const uint8_t source1[]={0xc4};
2664         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2665         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2666     }
2667     ucnv_close(cnv);
2668 
2669 }
2670 
2671 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2672 static void
TestICCRunout(void)2673 TestICCRunout(void) {
2674 /*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2675 
2676     const char *cnvName = "ibm-1363";
2677     UErrorCode status = U_ZERO_ERROR;
2678     const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2679     /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2680     const char *source = sourceData;
2681     const char *sourceLim = sourceData+sizeof(sourceData);
2682     UChar c1, c2, c3;
2683     UConverter *cnv=ucnv_open(cnvName, &status);
2684     if(U_FAILURE(status)) {
2685         log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2686 	return;
2687     }
2688 
2689 #if 0
2690     {
2691     UChar   targetBuf[256];
2692     UChar   *target = targetBuf;
2693     UChar   *targetLim = target+256;
2694     ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, true, &status);
2695 
2696     log_info("After convert: target@%d, source@%d, status%s\n",
2697 	     target-targetBuf, source-sourceData, u_errorName(status));
2698 
2699     if(U_FAILURE(status)) {
2700 	log_err("Failed to convert: %s\n", u_errorName(status));
2701     } else {
2702 
2703     }
2704     }
2705 #endif
2706 
2707     c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2708     log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2709 
2710     c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2711     log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2712 
2713     c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2714     log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2715 
2716     if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2717 	log_verbose("OK\n");
2718     } else {
2719 	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2720     }
2721 
2722     ucnv_close(cnv);
2723 
2724 }
2725 #endif
2726 
2727 #ifdef U_ENABLE_GENERIC_ISO_2022
2728 
2729 static void
TestISO_2022()2730 TestISO_2022() {
2731     /* test input */
2732     static const uint8_t in[]={
2733         0x1b, 0x25, 0x42,
2734         0x31,
2735         0x32,
2736         0x61,
2737         0xc2, 0x80,
2738         0xe0, 0xa0, 0x80,
2739         0xf0, 0x90, 0x80, 0x80
2740     };
2741 
2742 
2743 
2744     /* expected test results */
2745     static const int32_t results[]={
2746         /* number of bytes read, code point */
2747         4, 0x0031,  /* 4 bytes including the escape sequence */
2748         1, 0x0032,
2749         1, 0x61,
2750         2, 0x80,
2751         3, 0x800,
2752         4, 0x10000
2753     };
2754 
2755     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2756     UErrorCode errorCode=U_ZERO_ERROR;
2757     UConverter *cnv;
2758 
2759     cnv=ucnv_open("ISO_2022", &errorCode);
2760     if(U_FAILURE(errorCode)) {
2761         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2762         return;
2763     }
2764     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2765 
2766     /* Test the condition when source >= sourceLimit */
2767     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2768     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2769     /*Test for the condition where we have a truncated char*/
2770     {
2771         static const uint8_t source1[]={0xc4};
2772         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2773         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2774     }
2775     /*Test for the condition where there is an invalid character*/
2776     {
2777         static const uint8_t source2[]={0xa1, 0x01};
2778         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2779     }
2780     ucnv_close(cnv);
2781 }
2782 
2783 #endif
2784 
2785 static void
TestSmallTargetBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2786 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2787     const UChar* uSource;
2788     const UChar* uSourceLimit;
2789     const char* cSource;
2790     const char* cSourceLimit;
2791     UChar *uTargetLimit =NULL;
2792     UChar *uTarget;
2793     char *cTarget;
2794     const char *cTargetLimit;
2795     char *cBuf;
2796     UChar *uBuf; /*,*test;*/
2797     int32_t uBufSize = 120;
2798     int len=0;
2799     int i=2;
2800     UErrorCode errorCode=U_ZERO_ERROR;
2801     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2802     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2803     ucnv_reset(cnv);
2804     for(;--i>0; ){
2805         uSource = (UChar*) source;
2806         uSourceLimit=(const UChar*)sourceLimit;
2807         cTarget = cBuf;
2808         uTarget = uBuf;
2809         cSource = cBuf;
2810         cTargetLimit = cBuf;
2811         uTargetLimit = uBuf;
2812 
2813         do{
2814 
2815             cTargetLimit = cTargetLimit+ i;
2816             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2817             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2818                errorCode=U_ZERO_ERROR;
2819                 continue;
2820             }
2821 
2822             if(U_FAILURE(errorCode)){
2823                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2824                 return;
2825             }
2826 
2827         }while (uSource<uSourceLimit);
2828 
2829         cSourceLimit =cTarget;
2830         do{
2831             uTargetLimit=uTargetLimit+i;
2832             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2833             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2834                errorCode=U_ZERO_ERROR;
2835                 continue;
2836             }
2837             if(U_FAILURE(errorCode)){
2838                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2839                     return;
2840             }
2841         }while(cSource<cSourceLimit);
2842 
2843         uSource = source;
2844         /*test =uBuf;*/
2845         for(len=0;len<(int)(source - sourceLimit);len++){
2846             if(uBuf[len]!=uSource[len]){
2847                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2848             }
2849         }
2850     }
2851     free(uBuf);
2852     free(cBuf);
2853 }
2854 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2855 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2856     const UChar* uSource;
2857     const UChar* uSourceLimit;
2858     const char* cSource;
2859     UChar *uTargetLimit =NULL;
2860     UChar *uTarget;
2861     char *cTarget;
2862     const char *cTargetLimit;
2863     char *cBuf;
2864     UChar *uBuf,*test;
2865     int32_t uBufSize = 120;
2866     int numCharsInTarget=0;
2867     UErrorCode errorCode=U_ZERO_ERROR;
2868     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2869     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2870     uSource = source;
2871     uSourceLimit=sourceLimit;
2872     cTarget = cBuf;
2873     cTargetLimit = cBuf +uBufSize*5;
2874     uTarget = uBuf;
2875     uTargetLimit = uBuf+ uBufSize*5;
2876     ucnv_reset(cnv);
2877     numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2878     if(U_FAILURE(errorCode)){
2879         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2880         return;
2881     }
2882     cSource = cBuf;
2883     test =uBuf;
2884     ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2885     if(U_FAILURE(errorCode)){
2886         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2887         return;
2888     }
2889     uSource = source;
2890     while(uSource<uSourceLimit){
2891         if(*test!=*uSource){
2892 
2893             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2894         }
2895         uSource++;
2896         test++;
2897     }
2898     free(uBuf);
2899     free(cBuf);
2900 }
2901 
TestSmallSourceBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2902 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2903     const UChar* uSource;
2904     const UChar* uSourceLimit;
2905     const char* cSource;
2906     const char* cSourceLimit;
2907     UChar *uTargetLimit =NULL;
2908     UChar *uTarget;
2909     char *cTarget;
2910     const char *cTargetLimit;
2911     char *cBuf;
2912     UChar *uBuf; /*,*test;*/
2913     int32_t uBufSize = 120;
2914     int len=0;
2915     int i=2;
2916     const UChar *temp = sourceLimit;
2917     UErrorCode errorCode=U_ZERO_ERROR;
2918     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2919     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2920 
2921     ucnv_reset(cnv);
2922     for(;--i>0;){
2923         uSource = (UChar*) source;
2924         cTarget = cBuf;
2925         uTarget = uBuf;
2926         cSource = cBuf;
2927         cTargetLimit = cBuf;
2928         uTargetLimit = uBuf+uBufSize*5;
2929         cTargetLimit = cTargetLimit+uBufSize*10;
2930         uSourceLimit=uSource;
2931         do{
2932 
2933             if (uSourceLimit < sourceLimit) {
2934                 uSourceLimit = uSourceLimit+1;
2935             }
2936             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2937             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2938                errorCode=U_ZERO_ERROR;
2939                 continue;
2940             }
2941 
2942             if(U_FAILURE(errorCode)){
2943                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2944                 return;
2945             }
2946 
2947         }while (uSource<temp);
2948 
2949         cSourceLimit =cBuf;
2950         do{
2951             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2952                 cSourceLimit = cSourceLimit+1;
2953             }
2954             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2955             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2956                errorCode=U_ZERO_ERROR;
2957                 continue;
2958             }
2959             if(U_FAILURE(errorCode)){
2960                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2961                     return;
2962             }
2963         }while(cSource<cTarget);
2964 
2965         uSource = source;
2966         /*test =uBuf;*/
2967         for(;len<(int)(source - sourceLimit);len++){
2968             if(uBuf[len]!=uSource[len]){
2969                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2970             }
2971         }
2972     }
2973     free(uBuf);
2974     free(cBuf);
2975 }
2976 static void
TestGetNextUChar2022(UConverter * cnv,const char * source,const char * limit,const uint16_t results[],const char * message)2977 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2978                      const uint16_t results[], const char* message){
2979 /*     const char* s0; */
2980      const char* s=(char*)source;
2981      const uint16_t *r=results;
2982      UErrorCode errorCode=U_ZERO_ERROR;
2983      uint32_t c,exC;
2984      ucnv_reset(cnv);
2985      while(s<limit) {
2986 	 /* s0=s; */
2987         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2988         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2989             break; /* no more significant input */
2990         } else if(U_FAILURE(errorCode)) {
2991             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2992             break;
2993         } else {
2994             if(U16_IS_LEAD(*r)){
2995                 int i =0, len = 2;
2996                 U16_NEXT(r, i, len, exC);
2997                 r++;
2998             }else{
2999                 exC = *r;
3000             }
3001             if(c!=(uint32_t)(exC))
3002                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
3003         }
3004         r++;
3005     }
3006 }
3007 
TestJitterbug930(const char * enc)3008 static int TestJitterbug930(const char* enc){
3009     UErrorCode err = U_ZERO_ERROR;
3010     UConverter*converter;
3011     char out[80];
3012     char*target = out;
3013     UChar in[4];
3014     const UChar*source = in;
3015     int32_t off[80];
3016     int32_t* offsets = off;
3017     int numOffWritten=0;
3018     UBool flush = 0;
3019     converter = my_ucnv_open(enc, &err);
3020 
3021     in[0] = 0x41;     /* 0x4E00;*/
3022     in[1] = 0x4E01;
3023     in[2] = 0x4E02;
3024     in[3] = 0x4E03;
3025 
3026     memset(off, '*', sizeof(off));
3027 
3028     ucnv_fromUnicode (converter,
3029             &target,
3030             target+2,
3031             &source,
3032             source+3,
3033             offsets,
3034             flush,
3035             &err);
3036 
3037         /* writes three bytes into the output buffer: 41 1B 24
3038         * but offsets contains 0 1 1
3039     */
3040     while(*offsets< off[10]){
3041         numOffWritten++;
3042         offsets++;
3043     }
3044     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3045     if(numOffWritten!= (int)(target-out)){
3046         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3047     }
3048 
3049     err = U_ZERO_ERROR;
3050 
3051     memset(off,'*' , sizeof(off));
3052 
3053     flush = 1;
3054     offsets=off;
3055     ucnv_fromUnicode (converter,
3056             &target,
3057             target+4,
3058             &source,
3059             source,
3060             offsets,
3061             flush,
3062             &err);
3063     numOffWritten=0;
3064     while(*offsets< off[10]){
3065         numOffWritten++;
3066         if(*offsets!= -1){
3067             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3068         }
3069         offsets++;
3070     }
3071 
3072     /* writes 42 43 7A into output buffer,
3073      * offsets contains -1 -1 -1
3074      */
3075     ucnv_close(converter);
3076     return 0;
3077 }
3078 
3079 static void
TestHZ(void)3080 TestHZ(void) {
3081     /* test input */
3082     static const uint16_t in[]={
3083             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3084             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3085             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3086             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3087             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3088             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3089             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3090             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3091             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3092             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3093             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3094             0x005A, 0x005B, 0x005C, 0x000A
3095       };
3096     const UChar* uSource;
3097     const UChar* uSourceLimit;
3098     const char* cSource;
3099     const char* cSourceLimit;
3100     UChar *uTargetLimit =NULL;
3101     UChar *uTarget;
3102     char *cTarget;
3103     const char *cTargetLimit;
3104     char *cBuf = NULL;
3105     UChar *uBuf = NULL;
3106     UChar *test;
3107     int32_t uBufSize = 120;
3108     UErrorCode errorCode=U_ZERO_ERROR;
3109     UConverter *cnv = NULL;
3110     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3111     int32_t* myOff= offsets;
3112     cnv=ucnv_open("HZ", &errorCode);
3113     if(U_FAILURE(errorCode)) {
3114         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3115         goto cleanup;
3116     }
3117 
3118     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3119     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3120     uSource = (const UChar*)in;
3121     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3122     cTarget = cBuf;
3123     cTargetLimit = cBuf +uBufSize*5;
3124     uTarget = uBuf;
3125     uTargetLimit = uBuf+ uBufSize*5;
3126     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3127     if(U_FAILURE(errorCode)){
3128         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3129         goto cleanup;
3130     }
3131     cSource = cBuf;
3132     cSourceLimit =cTarget;
3133     test =uBuf;
3134     myOff=offsets;
3135     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3136     if(U_FAILURE(errorCode)){
3137         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3138         goto cleanup;
3139     }
3140     uSource = (const UChar*)in;
3141     while(uSource<uSourceLimit){
3142         if(*test!=*uSource){
3143 
3144             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3145         }
3146         uSource++;
3147         test++;
3148     }
3149     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3150     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3151     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3152     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3153     TestJitterbug930("csISO2022JP");
3154 
3155 cleanup:
3156     ucnv_close(cnv);
3157     free(offsets);
3158     free(uBuf);
3159     free(cBuf);
3160 }
3161 
3162 static void
TestISCII(void)3163 TestISCII(void){
3164         /* test input */
3165     static const uint16_t in[]={
3166         /* test full range of Devanagari */
3167         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3168         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3169         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3170         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3171         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3172         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3173         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3174         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3175         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3176         0x096D,0x096E,0x096F,
3177         /* test Soft halant*/
3178         0x0915,0x094d, 0x200D,
3179         /* test explicit halant */
3180         0x0915,0x094d, 0x200c,
3181         /* test double danda */
3182         0x965,
3183         /* test ASCII */
3184         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3185         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3186         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3187         /* tests from Lotus */
3188         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3189         0x0930,0x094D,0x200D,
3190         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3191         0x0915,0x0921,0x002B,0x095F,
3192         /* tamil range */
3193         0x0B86, 0xB87, 0xB88,
3194         /* telugu range */
3195         0x0C05, 0x0C02, 0x0C03,0x0c31,
3196         /* kannada range */
3197         0x0C85, 0xC82, 0x0C83,
3198         /* test Abbr sign and Anudatta */
3199         0x0970, 0x952,
3200        /* 0x0958,
3201         0x0959,
3202         0x095A,
3203         0x095B,
3204         0x095C,
3205         0x095D,
3206         0x095E,
3207         0x095F,*/
3208         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3209         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3210         0x090C ,
3211         0x0962,
3212         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3213         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3214         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3215         0x093D /* Avagraha  0xEA, 0xE9*/,
3216         0x0958,
3217         0x0959,
3218         0x095A,
3219         0x095B,
3220         0x095C,
3221         0x095D,
3222         0x095E,
3223         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3224       };
3225     static const unsigned char byteArr[]={
3226 
3227         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3228         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3229         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3230         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3231         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3232         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3233         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3234         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3235         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3236         0xf8,0xf9,0xfa,
3237         /* test soft halant */
3238         0xb3, 0xE8, 0xE9,
3239         /* test explicit halant */
3240         0xb3, 0xE8, 0xE8,
3241         /* test double danda */
3242         0xea, 0xea,
3243         /* test ASCII */
3244         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3245         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3246         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3247         /* test ATR code */
3248 
3249         /* tests from Lotus */
3250         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3251         0xEF,0x42,0xCF,0xE8,0xD9,
3252         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3253         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3254         /* tamil range */
3255         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3256         /* telugu range */
3257         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3258         /* kannada range */
3259         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3260         /* anudatta and abbreviation sign */
3261         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3262 
3263 
3264         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3265 
3266         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3267 
3268         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3269 
3270         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3271 
3272         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3273 
3274         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3275 
3276         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3277 
3278         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3279 
3280         0xB3, 0xE9, /* Ka + NUKTA */
3281 
3282         0xB4, 0xE9, /* Kha + NUKTA */
3283 
3284         0xB5, 0xE9, /* Ga + NUKTA */
3285 
3286         0xBA, 0xE9,
3287 
3288         0xBF, 0xE9,
3289 
3290         0xC0, 0xE9,
3291 
3292         0xC9, 0xE9,
3293         /* INV halant RA    */
3294         0xD9, 0xE8, 0xCF,
3295         0x00, 0x00A0,
3296         /* just consume unhandled codepoints */
3297         0xEF, 0x30,
3298 
3299     };
3300     testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,true);
3301     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3302 
3303 }
3304 
3305 static void
TestISO_2022_JP(void)3306 TestISO_2022_JP(void) {
3307     /* test input */
3308     static const uint16_t in[]={
3309         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3310         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3311         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3312         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3313         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3314         0x201D, 0x3014, 0x000D, 0x000A,
3315         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3316         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3317         };
3318     const UChar* uSource;
3319     const UChar* uSourceLimit;
3320     const char* cSource;
3321     const char* cSourceLimit;
3322     UChar *uTargetLimit =NULL;
3323     UChar *uTarget;
3324     char *cTarget;
3325     const char *cTargetLimit;
3326     char *cBuf = NULL;
3327     UChar *uBuf = NULL;
3328     UChar *test;
3329     int32_t uBufSize = 120;
3330     UErrorCode errorCode=U_ZERO_ERROR;
3331     UConverter *cnv = NULL;
3332     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3333     int32_t* myOff= offsets;
3334     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3335     if(U_FAILURE(errorCode)) {
3336         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3337         goto cleanup;
3338     }
3339 
3340     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3341     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3342     uSource = (const UChar*)in;
3343     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3344     cTarget = cBuf;
3345     cTargetLimit = cBuf +uBufSize*5;
3346     uTarget = uBuf;
3347     uTargetLimit = uBuf+ uBufSize*5;
3348     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3349     if(U_FAILURE(errorCode)){
3350         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3351         goto cleanup;
3352     }
3353     cSource = cBuf;
3354     cSourceLimit =cTarget;
3355     test =uBuf;
3356     myOff=offsets;
3357     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3358     if(U_FAILURE(errorCode)){
3359         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3360         goto cleanup;
3361     }
3362 
3363     uSource = (const UChar*)in;
3364     while(uSource<uSourceLimit){
3365         if(*test!=*uSource){
3366 
3367             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3368         }
3369         uSource++;
3370         test++;
3371     }
3372 
3373     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3374     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3375     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3376     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3377     TestJitterbug930("csISO2022JP");
3378 
3379 cleanup:
3380     ucnv_close(cnv);
3381     free(uBuf);
3382     free(cBuf);
3383     free(offsets);
3384 }
3385 
TestConv(const uint16_t in[],int len,const char * conv,const char * lang,char byteArr[],int byteArrLen)3386 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3387     const UChar* uSource;
3388     const UChar* uSourceLimit;
3389     const char* cSource;
3390     const char* cSourceLimit;
3391     UChar *uTargetLimit =NULL;
3392     UChar *uTarget;
3393     char *cTarget;
3394     const char *cTargetLimit;
3395     char *cBuf;
3396     UChar *uBuf,*test;
3397     int32_t uBufSize = 120*10;
3398     UErrorCode errorCode=U_ZERO_ERROR;
3399     UConverter *cnv;
3400     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3401     int32_t* myOff= offsets;
3402     cnv=my_ucnv_open(conv, &errorCode);
3403     if(U_FAILURE(errorCode)) {
3404         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3405         return;
3406     }
3407 
3408     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3409     cBuf =(char*)malloc(uBufSize * sizeof(char));
3410     uSource = (const UChar*)in;
3411     uSourceLimit=uSource+len;
3412     cTarget = cBuf;
3413     cTargetLimit = cBuf +uBufSize;
3414     uTarget = uBuf;
3415     uTargetLimit = uBuf+ uBufSize;
3416     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3417     if(U_FAILURE(errorCode)){
3418         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3419         return;
3420     }
3421     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3422     cSource = cBuf;
3423     cSourceLimit =cTarget;
3424     test =uBuf;
3425     myOff=offsets;
3426     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3427     if(U_FAILURE(errorCode)){
3428         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3429         return;
3430     }
3431 
3432     uSource = (const UChar*)in;
3433     while(uSource<uSourceLimit){
3434         if(*test!=*uSource){
3435             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3436         }
3437         uSource++;
3438         test++;
3439     }
3440     TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3441     TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3442     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3443     if(byteArr && byteArrLen!=0){
3444         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3445         TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3446         {
3447             cSource = byteArr;
3448             cSourceLimit = cSource+byteArrLen;
3449             test=uBuf;
3450             myOff = offsets;
3451             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3452             if(U_FAILURE(errorCode)){
3453                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3454                 return;
3455             }
3456 
3457             uSource = (const UChar*)in;
3458             while(uSource<uSourceLimit){
3459                 if(*test!=*uSource){
3460                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3461                 }
3462                 uSource++;
3463                 test++;
3464             }
3465         }
3466     }
3467 
3468     ucnv_close(cnv);
3469     free(uBuf);
3470     free(cBuf);
3471     free(offsets);
3472 }
3473 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)3474 _charAt(int32_t offset, void *context) {
3475     return ((char*)context)[offset];
3476 }
3477 
3478 static int32_t
unescape(UChar * dst,int32_t dstLen,const char * src,int32_t srcLen,UErrorCode * status)3479 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3480     int32_t srcIndex=0;
3481     int32_t dstIndex=0;
3482     if(U_FAILURE(*status)){
3483         return 0;
3484     }
3485     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3486         *status = U_ILLEGAL_ARGUMENT_ERROR;
3487         return 0;
3488     }
3489     if(srcLen==-1){
3490         srcLen = (int32_t)uprv_strlen(src);
3491     }
3492 
3493     for (; srcIndex<srcLen; ) {
3494         UChar32 c = src[srcIndex++];
3495         if (c == 0x005C /*'\\'*/) {
3496             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3497             if (c == (UChar32)0xFFFFFFFF) {
3498                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3499                 break; /* invalid escape sequence */
3500             }
3501         }
3502         if(dstIndex < dstLen){
3503             if(c>0xFFFF){
3504                dst[dstIndex++] = U16_LEAD(c);
3505                if(dstIndex<dstLen){
3506                     dst[dstIndex]=U16_TRAIL(c);
3507                }else{
3508                    *status=U_BUFFER_OVERFLOW_ERROR;
3509                }
3510             }else{
3511                 dst[dstIndex]=(UChar)c;
3512             }
3513 
3514         }else{
3515             *status = U_BUFFER_OVERFLOW_ERROR;
3516         }
3517         dstIndex++; /* for preflighting */
3518     }
3519     return dstIndex;
3520 }
3521 
3522 static void
TestFullRoundtrip(const char * cp)3523 TestFullRoundtrip(const char* cp){
3524     UChar usource[10] ={0};
3525     UChar nsrc[10] = {0};
3526     uint32_t i=1;
3527     int len=0, ulen;
3528     nsrc[0]=0x0061;
3529     /* Test codepoint 0 */
3530     TestConv(usource,1,cp,"",NULL,0);
3531     TestConv(usource,2,cp,"",NULL,0);
3532     nsrc[2]=0x5555;
3533     TestConv(nsrc,3,cp,"",NULL,0);
3534 
3535     for(;i<=0x10FFFF;i++){
3536         if(i==0xD800){
3537             i=0xDFFF;
3538             continue;
3539         }
3540         if(i<=0xFFFF){
3541             usource[0] =(UChar) i;
3542             len=1;
3543         }else{
3544             usource[0]=U16_LEAD(i);
3545             usource[1]=U16_TRAIL(i);
3546             len=2;
3547         }
3548         ulen=len;
3549         if(i==0x80) {
3550             usource[2]=0;
3551         }
3552         /* Test only single code points */
3553         TestConv(usource,ulen,cp,"",NULL,0);
3554         /* Test codepoint repeated twice */
3555         usource[ulen]=usource[0];
3556         usource[ulen+1]=usource[1];
3557         ulen+=len;
3558         TestConv(usource,ulen,cp,"",NULL,0);
3559         /* Test codepoint repeated 3 times */
3560         usource[ulen]=usource[0];
3561         usource[ulen+1]=usource[1];
3562         ulen+=len;
3563         TestConv(usource,ulen,cp,"",NULL,0);
3564         /* Test codepoint in between 2 codepoints */
3565         nsrc[1]=usource[0];
3566         nsrc[2]=usource[1];
3567         nsrc[len+1]=0x5555;
3568         TestConv(nsrc,len+2,cp,"",NULL,0);
3569         uprv_memset(usource,0,sizeof(UChar)*10);
3570     }
3571 }
3572 
3573 static void
TestRoundTrippingAllUTF(void)3574 TestRoundTrippingAllUTF(void){
3575     if(!getTestOption(QUICK_OPTION)){
3576         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3577         TestFullRoundtrip("BOCU-1");
3578         log_verbose("Running exhaustive round trip test for SCSU\n");
3579         TestFullRoundtrip("SCSU");
3580         log_verbose("Running exhaustive round trip test for UTF-8\n");
3581         TestFullRoundtrip("UTF-8");
3582         log_verbose("Running exhaustive round trip test for CESU-8\n");
3583         TestFullRoundtrip("CESU-8");
3584         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3585         TestFullRoundtrip("UTF-16BE");
3586         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3587         TestFullRoundtrip("UTF-16LE");
3588         log_verbose("Running exhaustive round trip test for UTF-16\n");
3589         TestFullRoundtrip("UTF-16");
3590         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3591         TestFullRoundtrip("UTF-32BE");
3592         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3593         TestFullRoundtrip("UTF-32LE");
3594         log_verbose("Running exhaustive round trip test for UTF-32\n");
3595         TestFullRoundtrip("UTF-32");
3596         log_verbose("Running exhaustive round trip test for UTF-7\n");
3597         TestFullRoundtrip("UTF-7");
3598         log_verbose("Running exhaustive round trip test for UTF-7\n");
3599         TestFullRoundtrip("UTF-7,version=1");
3600         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3601         TestFullRoundtrip("IMAP-mailbox-name");
3602         /*
3603          *
3604          * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3605          * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3606          * The old mappings remain as fallbacks.
3607          * This test may be reintroduced at a later time.
3608          *
3609          * 110118 - mow
3610          */
3611          /*
3612          log_verbose("Running exhaustive round trip test for GB18030\n");
3613          TestFullRoundtrip("GB18030");
3614          */
3615     }
3616 }
3617 
3618 static void
TestSCSU(void)3619 TestSCSU(void) {
3620 
3621     static const uint16_t germanUTF16[]={
3622         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3623     };
3624 
3625     static const uint8_t germanSCSU[]={
3626         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3627     };
3628 
3629     static const uint16_t russianUTF16[]={
3630         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3631     };
3632 
3633     static const uint8_t russianSCSU[]={
3634         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3635     };
3636 
3637     static const uint16_t japaneseUTF16[]={
3638         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3639         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3640         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3641         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3642         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3643         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3644         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3645         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3646         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3647         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3648         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3649         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3650         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3651         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3652         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3653     };
3654 
3655     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3656      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3657     static const uint8_t japaneseSCSU[]={
3658         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3659         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3660         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3661         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3662         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3663         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3664         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3665         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3666         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3667         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3668         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3669         0xcb, 0x82
3670     };
3671 
3672     static const uint16_t allFeaturesUTF16[]={
3673         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3674         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3675         0x01df, 0xf000, 0xdbff, 0xdfff
3676     };
3677 
3678     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3679      * result here (34B vs. 35B)
3680      */
3681     static const uint8_t allFeaturesSCSU[]={
3682         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3683         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3684         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3685         0xdf, 0x14, 0x80, 0x15, 0xff
3686     };
3687     static const uint16_t monkeyIn[]={
3688         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3689         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3690         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3691         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3692         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3693         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3694         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3695         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3696         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3697         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3698         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3699         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3700         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3701         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3702         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3703         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3704         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3705         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3706         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3707         /* test non-BMP code points */
3708         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3709         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3710         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3711         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3712         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3713         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3714         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3715         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3716         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3717         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3718         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3719 
3720 
3721         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3722         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3723         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3724         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3725         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3726     };
3727     static const char *fTestCases [] = {
3728           "\\ud800\\udc00", /* smallest surrogate*/
3729           "\\ud8ff\\udcff",
3730           "\\udBff\\udFff", /* largest surrogate pair*/
3731           "\\ud834\\udc00",
3732           "\\U0010FFFF",
3733           "Hello \\u9292 \\u9192 World!",
3734           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3735           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3736 
3737           "\\u0648\\u06c8", /* catch missing reset*/
3738           "\\u0648\\u06c8",
3739 
3740           "\\u4444\\uE001", /* lowest quotable*/
3741           "\\u4444\\uf2FF", /* highest quotable*/
3742           "\\u4444\\uf188\\u4444",
3743           "\\u4444\\uf188\\uf288",
3744           "\\u4444\\uf188abc\\u0429\\uf288",
3745           "\\u9292\\u2222",
3746           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3747           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3748           "Hello World!123456",
3749           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3750 
3751           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3752           "abc\\u4411d",      /* uses SQU*/
3753           "abc\\u4411\\u4412d",/* uses SCU*/
3754           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3755           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3756           "\\u9292\\u2222",
3757           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3758           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3759           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3760 
3761           "", /* empty input*/
3762           "\\u0000", /* smallest BMP character*/
3763           "\\uFFFF", /* largest BMP character*/
3764 
3765           /* regression tests*/
3766           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3767           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3768           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3769           "\\u0041\\u00df\\u0401\\u015f",
3770           "\\u9066\\u2123abc",
3771           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3772           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3773     };
3774     int i=0;
3775     for(;i<UPRV_LENGTHOF(fTestCases);i++){
3776         const char* cSrc = fTestCases[i];
3777         UErrorCode status = U_ZERO_ERROR;
3778         int32_t cSrcLen,srcLen;
3779         UChar* src;
3780         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3781         cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3782         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3783         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3784         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3785         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3786         free(src);
3787     }
3788     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3789     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3790     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3791     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3792     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3793     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3794     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3795 }
3796 
3797 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346(void)3798 static void TestJitterbug2346(void){
3799     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3800                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3801     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3802 
3803     UChar uTarget[500]={'\0'};
3804     UChar* utarget=uTarget;
3805     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3806 
3807     char cTarget[500]={'\0'};
3808     char* ctarget=cTarget;
3809     char* ctargetLimit=cTarget+sizeof(cTarget);
3810     const char* csource=source;
3811     UChar* temp = expected;
3812     UErrorCode err=U_ZERO_ERROR;
3813 
3814     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3815     if(U_FAILURE(err)) {
3816         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3817         return;
3818     }
3819     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,true,&err);
3820     if(U_FAILURE(err)) {
3821         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3822         return;
3823     }
3824     utargetLimit=utarget;
3825     utarget = uTarget;
3826     while(utarget<utargetLimit){
3827         if(*temp!=*utarget){
3828 
3829             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3830         }
3831         utarget++;
3832         temp++;
3833     }
3834     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
3835     if(U_FAILURE(err)) {
3836         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3837         return;
3838     }
3839     ctargetLimit=ctarget;
3840     ctarget =cTarget;
3841     ucnv_close(conv);
3842 
3843 
3844 }
3845 
3846 static void
TestISO_2022_JP_1(void)3847 TestISO_2022_JP_1(void) {
3848     /* test input */
3849     static const uint16_t in[]={
3850         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3851         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3852         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3853         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3854         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3855         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3856         0x201D, 0x000D, 0x000A,
3857         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3858         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3859         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3860         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3861         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3862         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3863       };
3864     const UChar* uSource;
3865     const UChar* uSourceLimit;
3866     const char* cSource;
3867     const char* cSourceLimit;
3868     UChar *uTargetLimit =NULL;
3869     UChar *uTarget;
3870     char *cTarget;
3871     const char *cTargetLimit;
3872     char *cBuf;
3873     UChar *uBuf,*test;
3874     int32_t uBufSize = 120;
3875     UErrorCode errorCode=U_ZERO_ERROR;
3876     UConverter *cnv;
3877 
3878     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3879     if(U_FAILURE(errorCode)) {
3880         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3881         return;
3882     }
3883 
3884     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3885     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3886     uSource = (const UChar*)in;
3887     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3888     cTarget = cBuf;
3889     cTargetLimit = cBuf +uBufSize*5;
3890     uTarget = uBuf;
3891     uTargetLimit = uBuf+ uBufSize*5;
3892     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,true, &errorCode);
3893     if(U_FAILURE(errorCode)){
3894         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3895         return;
3896     }
3897     cSource = cBuf;
3898     cSourceLimit =cTarget;
3899     test =uBuf;
3900     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,true,&errorCode);
3901     if(U_FAILURE(errorCode)){
3902         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3903         return;
3904     }
3905     uSource = (const UChar*)in;
3906     while(uSource<uSourceLimit){
3907         if(*test!=*uSource){
3908 
3909             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3910         }
3911         uSource++;
3912         test++;
3913     }
3914     /*ucnv_close(cnv);
3915     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3916     /*Test for the condition where there is an invalid character*/
3917     ucnv_reset(cnv);
3918     {
3919         static const uint8_t source2[]={0x0e,0x24,0x053};
3920         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3921     }
3922     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3923     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3924     ucnv_close(cnv);
3925     free(uBuf);
3926     free(cBuf);
3927 }
3928 
3929 static void
TestISO_2022_JP_2(void)3930 TestISO_2022_JP_2(void) {
3931     /* test input */
3932     static const uint16_t in[]={
3933         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3934         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3935         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3936         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3937         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3938         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3939         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3940         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3941         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3942         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3943         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3944         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3945         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3946         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3947         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3948         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3949         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3950         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3951         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3952       };
3953     const UChar* uSource;
3954     const UChar* uSourceLimit;
3955     const char* cSource;
3956     const char* cSourceLimit;
3957     UChar *uTargetLimit =NULL;
3958     UChar *uTarget;
3959     char *cTarget;
3960     const char *cTargetLimit;
3961     char *cBuf = NULL;
3962     UChar *uBuf = NULL;
3963     UChar *test;
3964     int32_t uBufSize = 120;
3965     UErrorCode errorCode=U_ZERO_ERROR;
3966     UConverter *cnv = NULL;
3967     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3968     int32_t* myOff= offsets;
3969     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3970     if(U_FAILURE(errorCode)) {
3971         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3972         goto cleanup;
3973     }
3974 
3975     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3976     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3977     uSource = (const UChar*)in;
3978     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3979     cTarget = cBuf;
3980     cTargetLimit = cBuf +uBufSize*5;
3981     uTarget = uBuf;
3982     uTargetLimit = uBuf+ uBufSize*5;
3983     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3984     if(U_FAILURE(errorCode)){
3985         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3986         goto cleanup;
3987     }
3988     cSource = cBuf;
3989     cSourceLimit =cTarget;
3990     test =uBuf;
3991     myOff=offsets;
3992     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3993     if(U_FAILURE(errorCode)){
3994         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3995         goto cleanup;
3996     }
3997     uSource = (const UChar*)in;
3998     while(uSource<uSourceLimit){
3999         if(*test!=*uSource){
4000 
4001             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4002         }
4003         uSource++;
4004         test++;
4005     }
4006     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4007     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4008     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4009     /*Test for the condition where there is an invalid character*/
4010     ucnv_reset(cnv);
4011     {
4012         static const uint8_t source2[]={0x0e,0x24,0x053};
4013         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4014     }
4015 
4016 cleanup:
4017     ucnv_close(cnv);
4018     free(uBuf);
4019     free(cBuf);
4020     free(offsets);
4021 }
4022 
4023 static void
TestISO_2022_KR(void)4024 TestISO_2022_KR(void) {
4025     /* test input */
4026     static const uint16_t in[]={
4027                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4028                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4029                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4030                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4031                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4032                    ,0x53E3,0x53E4,0x000A,0x000D};
4033     const UChar* uSource;
4034     const UChar* uSourceLimit;
4035     const char* cSource;
4036     const char* cSourceLimit;
4037     UChar *uTargetLimit =NULL;
4038     UChar *uTarget;
4039     char *cTarget;
4040     const char *cTargetLimit;
4041     char *cBuf = NULL;
4042     UChar *uBuf = NULL;
4043     UChar *test;
4044     int32_t uBufSize = 120;
4045     UErrorCode errorCode=U_ZERO_ERROR;
4046     UConverter *cnv = NULL;
4047     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4048     int32_t* myOff= offsets;
4049     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4050     if(U_FAILURE(errorCode)) {
4051         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4052         goto cleanup;
4053     }
4054 
4055     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4056     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4057     uSource = (const UChar*)in;
4058     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4059     cTarget = cBuf;
4060     cTargetLimit = cBuf +uBufSize*5;
4061     uTarget = uBuf;
4062     uTargetLimit = uBuf+ uBufSize*5;
4063     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4064     if(U_FAILURE(errorCode)){
4065         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4066         goto cleanup;
4067     }
4068     cSource = cBuf;
4069     cSourceLimit =cTarget;
4070     test =uBuf;
4071     myOff=offsets;
4072     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4073     if(U_FAILURE(errorCode)){
4074         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4075         goto cleanup;
4076     }
4077     uSource = (const UChar*)in;
4078     while(uSource<uSourceLimit){
4079         if(*test!=*uSource){
4080             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4081         }
4082         uSource++;
4083         test++;
4084     }
4085     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4086     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4087     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4088     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4089     TestJitterbug930("csISO2022KR");
4090     /*Test for the condition where there is an invalid character*/
4091     ucnv_reset(cnv);
4092     {
4093         static const uint8_t source2[]={0x1b,0x24,0x053};
4094         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4095         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4096     }
4097 
4098 cleanup:
4099     ucnv_close(cnv);
4100     free(uBuf);
4101     free(cBuf);
4102     free(offsets);
4103 }
4104 
4105 static void
TestISO_2022_KR_1(void)4106 TestISO_2022_KR_1(void) {
4107     /* test input */
4108     static const uint16_t in[]={
4109                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4110                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4111                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4112                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4113                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4114                    ,0x53E3,0x53E4,0x000A,0x000D};
4115     const UChar* uSource;
4116     const UChar* uSourceLimit;
4117     const char* cSource;
4118     const char* cSourceLimit;
4119     UChar *uTargetLimit =NULL;
4120     UChar *uTarget;
4121     char *cTarget;
4122     const char *cTargetLimit;
4123     char *cBuf = NULL;
4124     UChar *uBuf = NULL;
4125     UChar *test;
4126     int32_t uBufSize = 120;
4127     UErrorCode errorCode=U_ZERO_ERROR;
4128     UConverter *cnv = NULL;
4129     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4130     int32_t* myOff= offsets;
4131     cnv=ucnv_open("ibm-25546", &errorCode);
4132     if(U_FAILURE(errorCode)) {
4133         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4134         goto cleanup;
4135     }
4136 
4137     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4138     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4139     uSource = (const UChar*)in;
4140     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4141     cTarget = cBuf;
4142     cTargetLimit = cBuf +uBufSize*5;
4143     uTarget = uBuf;
4144     uTargetLimit = uBuf+ uBufSize*5;
4145     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4146     if(U_FAILURE(errorCode)){
4147         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4148         goto cleanup;
4149     }
4150     cSource = cBuf;
4151     cSourceLimit =cTarget;
4152     test =uBuf;
4153     myOff=offsets;
4154     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4155     if(U_FAILURE(errorCode)){
4156         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4157         goto cleanup;
4158     }
4159     uSource = (const UChar*)in;
4160     while(uSource<uSourceLimit){
4161         if(*test!=*uSource){
4162             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4163         }
4164         uSource++;
4165         test++;
4166     }
4167     ucnv_reset(cnv);
4168     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4169     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4170     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4171     ucnv_reset(cnv);
4172     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4173         /*Test for the condition where there is an invalid character*/
4174     ucnv_reset(cnv);
4175     {
4176         static const uint8_t source2[]={0x1b,0x24,0x053};
4177         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4178         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4179     }
4180 
4181 cleanup:
4182     ucnv_close(cnv);
4183     free(uBuf);
4184     free(cBuf);
4185     free(offsets);
4186 }
4187 
TestJitterbug2411(void)4188 static void TestJitterbug2411(void){
4189     static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4190                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4191     UConverter* kr=NULL, *kr1=NULL;
4192     UErrorCode errorCode = U_ZERO_ERROR;
4193     UChar tgt[100]={'\0'};
4194     UChar* target = tgt;
4195     UChar* targetLimit = target+100;
4196     kr=ucnv_open("iso-2022-kr", &errorCode);
4197     if(U_FAILURE(errorCode)) {
4198         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4199         return;
4200     }
4201     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4202     if(U_FAILURE(errorCode)) {
4203         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4204         return;
4205     }
4206     kr1 = ucnv_open("ibm-25546", &errorCode);
4207     if(U_FAILURE(errorCode)) {
4208         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4209         return;
4210     }
4211     target = tgt;
4212     targetLimit = target+100;
4213     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4214 
4215     if(U_FAILURE(errorCode)) {
4216         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4217         return;
4218     }
4219 
4220     ucnv_close(kr);
4221     ucnv_close(kr1);
4222 
4223 }
4224 
4225 static void
TestJIS(void)4226 TestJIS(void){
4227     /* From Unicode moved to testdata/conversion.txt */
4228     /*To Unicode*/
4229     {
4230         static const uint8_t sampleTextJIS[] = {
4231             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4232             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4233             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4234         };
4235         static const uint16_t expectedISO2022JIS[] = {
4236             0x0041, 0x0042,
4237             0xFF81, 0xFF82,
4238             0x3000
4239         };
4240         static const int32_t  toISO2022JISOffs[]={
4241             3,4,
4242             8,9,
4243             16
4244         };
4245 
4246         static const uint8_t sampleTextJIS7[] = {
4247             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4248             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4249             0x1b,0x24,0x42,0x21,0x21,
4250             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4251             0x21,0x22,
4252             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4253         };
4254         static const uint16_t expectedISO2022JIS7[] = {
4255             0x0041, 0x0042,
4256             0xFF81, 0xFF82,
4257             0x3000,
4258             0xFF81, 0xFF82,
4259             0x3001,
4260             0x3000
4261         };
4262         static const int32_t  toISO2022JIS7Offs[]={
4263             3,4,
4264             8,9,
4265             13,16,
4266             17,
4267             19,27
4268         };
4269         static const uint8_t sampleTextJIS8[] = {
4270             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4271             0xa1,0xc8,0xd9,/*Katakana Set*/
4272             0x1b,0x28,0x42,
4273             0x41,0x42,
4274             0xb1,0xc3, /*Katakana Set*/
4275             0x1b,0x24,0x42,0x21,0x21
4276         };
4277         static const uint16_t expectedISO2022JIS8[] = {
4278             0x0041, 0x0042,
4279             0xff61, 0xff88, 0xff99,
4280             0x0041, 0x0042,
4281             0xff71, 0xff83,
4282             0x3000
4283         };
4284         static const int32_t  toISO2022JIS8Offs[]={
4285             3, 4,  5,  6,
4286             7, 11, 12, 13,
4287             14, 18,
4288         };
4289 
4290         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4291             UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,true);
4292         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4293             UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,true);
4294         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4295             UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,true);
4296     }
4297 
4298 }
4299 
4300 
4301 #if 0
4302  ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4303 
4304 static void TestJitterbug915(){
4305 /* tests for roundtripping of the below sequence
4306 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4307 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4308 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4309 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4310 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4311 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4312 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4313 */
4314     static const char cSource[]={
4315         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4316         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4317         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4318         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4319         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4320         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4321         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4322         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4323         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4324         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4325         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4326         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4327         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4328         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4329         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4330         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4331         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4332         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4333         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4334         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4335         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4336         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4337         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4338         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4339         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4340         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4341         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4342         0x37, 0x20, 0x2A, 0x2F
4343     };
4344     UChar uTarget[500]={'\0'};
4345     UChar* utarget=uTarget;
4346     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4347 
4348     char cTarget[500]={'\0'};
4349     char* ctarget=cTarget;
4350     char* ctargetLimit=cTarget+sizeof(cTarget);
4351     const char* csource=cSource;
4352     const char* tempSrc = cSource;
4353     UErrorCode err=U_ZERO_ERROR;
4354 
4355     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4356     if(U_FAILURE(err)) {
4357         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4358         return;
4359     }
4360     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,true,&err);
4361     if(U_FAILURE(err)) {
4362         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4363         return;
4364     }
4365     utargetLimit=utarget;
4366     utarget = uTarget;
4367     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
4368     if(U_FAILURE(err)) {
4369         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4370         return;
4371     }
4372     ctargetLimit=ctarget;
4373     ctarget =cTarget;
4374     while(ctarget<ctargetLimit){
4375         if(*ctarget != *tempSrc){
4376             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4377         }
4378         ++ctarget;
4379         ++tempSrc;
4380     }
4381 
4382     ucnv_close(conv);
4383 }
4384 
4385 static void
4386 TestISO_2022_CN_EXT() {
4387     /* test input */
4388     static const uint16_t in[]={
4389                 /* test Non-BMP code points */
4390          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4391          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4392          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4393          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4394          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4395          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4396          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4397          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4398          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4399          0xD869, 0xDED5,
4400 
4401          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4402          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4403          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4404          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4405          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4406          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4407          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4408          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4409          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4410          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4411          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4412          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4413          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4414          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4415          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4416          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4417          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4418          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4419 
4420          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4421 
4422       };
4423 
4424     const UChar* uSource;
4425     const UChar* uSourceLimit;
4426     const char* cSource;
4427     const char* cSourceLimit;
4428     UChar *uTargetLimit =NULL;
4429     UChar *uTarget;
4430     char *cTarget;
4431     const char *cTargetLimit;
4432     char *cBuf = NULL;
4433     UChar *uBuf = NULL;
4434     UChar *test;
4435     int32_t uBufSize = 180;
4436     UErrorCode errorCode=U_ZERO_ERROR;
4437     UConverter *cnv = NULL;
4438     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4439     int32_t* myOff= offsets;
4440     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4441     if(U_FAILURE(errorCode)) {
4442         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4443         goto cleanup;
4444     }
4445 
4446     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4447     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4448     uSource = (const UChar*)in;
4449     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4450     cTarget = cBuf;
4451     cTargetLimit = cBuf +uBufSize*5;
4452     uTarget = uBuf;
4453     uTargetLimit = uBuf+ uBufSize*5;
4454     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4455     if(U_FAILURE(errorCode)){
4456         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4457         goto cleanup;
4458     }
4459     cSource = cBuf;
4460     cSourceLimit =cTarget;
4461     test =uBuf;
4462     myOff=offsets;
4463     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4464     if(U_FAILURE(errorCode)){
4465         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4466         goto cleanup;
4467     }
4468     uSource = (const UChar*)in;
4469     while(uSource<uSourceLimit){
4470         if(*test!=*uSource){
4471             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4472         }
4473         else{
4474             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4475         }
4476         uSource++;
4477         test++;
4478     }
4479     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4480     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4481     /*Test for the condition where there is an invalid character*/
4482     ucnv_reset(cnv);
4483     {
4484         static const uint8_t source2[]={0x0e,0x24,0x053};
4485         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4486     }
4487 
4488 cleanup:
4489     ucnv_close(cnv);
4490     free(uBuf);
4491     free(cBuf);
4492     free(offsets);
4493 }
4494 #endif
4495 
4496 static void
TestISO_2022_CN(void)4497 TestISO_2022_CN(void) {
4498     /* test input */
4499     static const uint16_t in[]={
4500          /* jitterbug 951 */
4501          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4502          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4503          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4504          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4505          0x0020, 0x0045, 0x004e, 0x0044,
4506          /**/
4507          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4508          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4509          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4510          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4511          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4512          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4513          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4514          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4515          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4516          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4517          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4518          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4519          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4520          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4521          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4522          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4523          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4524 
4525       };
4526     const UChar* uSource;
4527     const UChar* uSourceLimit;
4528     const char* cSource;
4529     const char* cSourceLimit;
4530     UChar *uTargetLimit =NULL;
4531     UChar *uTarget;
4532     char *cTarget;
4533     const char *cTargetLimit;
4534     char *cBuf = NULL;
4535     UChar *uBuf = NULL;
4536     UChar *test;
4537     int32_t uBufSize = 180;
4538     UErrorCode errorCode=U_ZERO_ERROR;
4539     UConverter *cnv = NULL;
4540     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4541     int32_t* myOff= offsets;
4542     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4543     if(U_FAILURE(errorCode)) {
4544         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4545         goto cleanup;
4546     }
4547 
4548     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4549     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4550     uSource = (const UChar*)in;
4551     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4552     cTarget = cBuf;
4553     cTargetLimit = cBuf +uBufSize*5;
4554     uTarget = uBuf;
4555     uTargetLimit = uBuf+ uBufSize*5;
4556     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4557     if(U_FAILURE(errorCode)){
4558         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4559         goto cleanup;
4560     }
4561     cSource = cBuf;
4562     cSourceLimit =cTarget;
4563     test =uBuf;
4564     myOff=offsets;
4565     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4566     if(U_FAILURE(errorCode)){
4567         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4568         goto cleanup;
4569     }
4570     uSource = (const UChar*)in;
4571     while(uSource<uSourceLimit){
4572         if(*test!=*uSource){
4573             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4574         }
4575         else{
4576             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4577         }
4578         uSource++;
4579         test++;
4580     }
4581     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4582     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4583     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4584     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4585     TestJitterbug930("csISO2022CN");
4586     /*Test for the condition where there is an invalid character*/
4587     ucnv_reset(cnv);
4588     {
4589         static const uint8_t source2[]={0x0e,0x24,0x053};
4590         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4591     }
4592 
4593 cleanup:
4594     ucnv_close(cnv);
4595     free(uBuf);
4596     free(cBuf);
4597     free(offsets);
4598 }
4599 
4600 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4601 typedef struct {
4602     const char *    converterName;
4603     const char *    inputText;
4604     int             inputTextLength;
4605 } EmptySegmentTest;
4606 
4607 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)4608 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4609                                              int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4610     // suppress compiler warnings about unused variables
4611     (void)context;
4612     (void)codeUnits;
4613     (void)length;
4614     if (reason > UCNV_IRREGULAR) {
4615         return;
4616     }
4617     if (reason != UCNV_IRREGULAR) {
4618         log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4619     }
4620     /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4621     *err = U_ZERO_ERROR;
4622     ucnv_cbToUWriteSub(toArgs,0,err);
4623 }
4624 
4625 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4626 static void TestJitterbug6175(void) {
4627     static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4628     static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4629     static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4630     static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4631     static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4632     static const EmptySegmentTest emptySegmentTests[] = {
4633         /* converterName inputText    inputTextLength */
4634         { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4635         { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4636         { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4637         { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4638         { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4639         /* terminator: */
4640         { NULL,          NULL,        0,                  }
4641     };
4642     const EmptySegmentTest * testPtr;
4643     for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4644         UErrorCode   err = U_ZERO_ERROR;
4645         UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4646         if (U_FAILURE(err)) {
4647             log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4648             return;
4649         }
4650         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4651         if (U_FAILURE(err)) {
4652             log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4653             ucnv_close(cnv);
4654             return;
4655         }
4656         {
4657             UChar         toUChars[kEmptySegmentToUCharsMax];
4658             UChar *       toUCharsPtr = toUChars;
4659             const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4660             const char *  inCharsPtr = testPtr->inputText;
4661             const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4662             ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, true, &err);
4663         }
4664         ucnv_close(cnv);
4665     }
4666 }
4667 
4668 static void
TestEBCDIC_STATEFUL(void)4669 TestEBCDIC_STATEFUL(void) {
4670     /* test input */
4671     static const uint8_t in[]={
4672         0x61,
4673         0x1a,
4674         0x0f, 0x4b,
4675         0x42,
4676         0x40,
4677         0x36,
4678     };
4679 
4680     /* expected test results */
4681     static const int32_t results[]={
4682         /* number of bytes read, code point */
4683         1, 0x002f,
4684         1, 0x0092,
4685         2, 0x002e,
4686         1, 0xff62,
4687         1, 0x0020,
4688         1, 0x0096,
4689 
4690     };
4691     static const uint8_t in2[]={
4692         0x0f,
4693         0xa1,
4694         0x01
4695     };
4696 
4697     /* expected test results */
4698     static const int32_t results2[]={
4699         /* number of bytes read, code point */
4700         2, 0x203E,
4701         1, 0x0001,
4702     };
4703 
4704     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4705     UErrorCode errorCode=U_ZERO_ERROR;
4706     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4707     if(U_FAILURE(errorCode)) {
4708         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4709         return;
4710     }
4711     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4712     ucnv_reset(cnv);
4713      /* Test the condition when source >= sourceLimit */
4714     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4715     ucnv_reset(cnv);
4716     /*Test for the condition where source > sourcelimit after consuming the shift character */
4717     {
4718         static const uint8_t source1[]={0x0f};
4719         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4720     }
4721     /*Test for the condition where there is an invalid character*/
4722     ucnv_reset(cnv);
4723     {
4724         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4725         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4726     }
4727     ucnv_reset(cnv);
4728     source=(const char*)in2;
4729     limit=(const char*)in2+sizeof(in2);
4730     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4731     ucnv_close(cnv);
4732 
4733 }
4734 
4735 static void
TestGB18030(void)4736 TestGB18030(void) {
4737     /* test input */
4738     static const uint8_t in[]={
4739         0x24,
4740         0x7f,
4741         0x81, 0x30, 0x81, 0x30,
4742         0xa8, 0xbf,
4743         0xa2, 0xe3,
4744         0xd2, 0xbb,
4745         0x82, 0x35, 0x8f, 0x33,
4746         0x84, 0x31, 0xa4, 0x39,
4747         0x90, 0x30, 0x81, 0x30,
4748         0xe3, 0x32, 0x9a, 0x35
4749 #if 0
4750         /*
4751          * Feature removed   markus 2000-oct-26
4752          * Only some codepages must match surrogate pairs into supplementary code points -
4753          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4754          * GB 18030 provides direct encodings for supplementary code points, therefore
4755          * it must not combine two single-encoded surrogates into one code point.
4756          */
4757         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4758 #endif
4759     };
4760 
4761     /* expected test results */
4762     static const int32_t results[]={
4763         /* number of bytes read, code point */
4764         1, 0x24,
4765         1, 0x7f,
4766         4, 0x80,
4767         2, 0x1f9,
4768         2, 0x20ac,
4769         2, 0x4e00,
4770         4, 0x9fa6,
4771         4, 0xffff,
4772         4, 0x10000,
4773         4, 0x10ffff
4774 #if 0
4775         /* Feature removed. See comment above. */
4776         8, 0x10000
4777 #endif
4778     };
4779 
4780 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4781     UErrorCode errorCode=U_ZERO_ERROR;
4782     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4783     if(U_FAILURE(errorCode)) {
4784         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4785         return;
4786     }
4787     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4788     ucnv_close(cnv);
4789 }
4790 
4791 static void
TestLMBCS(void)4792 TestLMBCS(void) {
4793     /* LMBCS-1 string */
4794     static const uint8_t pszLMBCS[]={
4795         0x61,
4796         0x01, 0x29,
4797         0x81,
4798         0xA0,
4799         0x0F, 0x27,
4800         0x0F, 0x91,
4801         0x14, 0x0a, 0x74,
4802         0x14, 0xF6, 0x02,
4803         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4804         0x10, 0x88, 0xA0,
4805     };
4806 
4807     /* Unicode UChar32 equivalents */
4808     static const UChar32 pszUnicode32[]={
4809         /* code point */
4810         0x00000061,
4811         0x00002013,
4812         0x000000FC,
4813         0x000000E1,
4814         0x00000007,
4815         0x00000091,
4816         0x00000a74,
4817         0x00000200,
4818         0x00023456, /* code point for surrogate pair */
4819         0x00005516
4820     };
4821 
4822 /* Unicode UChar equivalents */
4823     static const UChar pszUnicode[]={
4824         /* code point */
4825         0x0061,
4826         0x2013,
4827         0x00FC,
4828         0x00E1,
4829         0x0007,
4830         0x0091,
4831         0x0a74,
4832         0x0200,
4833         0xD84D, /* low surrogate */
4834         0xDC56, /* high surrogate */
4835         0x5516
4836     };
4837 
4838 /* expected test results */
4839     static const int offsets32[]={
4840         /* number of bytes read, code point */
4841         0,
4842         1,
4843         3,
4844         4,
4845         5,
4846         7,
4847         9,
4848         12,
4849         15,
4850         21,
4851         24
4852     };
4853 
4854 /* expected test results */
4855     static const int offsets[]={
4856         /* number of bytes read, code point */
4857         0,
4858         1,
4859         3,
4860         4,
4861         5,
4862         7,
4863         9,
4864         12,
4865         15,
4866         18,
4867         21,
4868         24
4869     };
4870 
4871 
4872     UConverter *cnv;
4873 
4874 #define NAME_LMBCS_1 "LMBCS-1"
4875 #define NAME_LMBCS_2 "LMBCS-2"
4876 
4877 
4878    /* Some basic open/close/property tests on some LMBCS converters */
4879     {
4880 
4881       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4882       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4883       char get_subchars [1];
4884       const char * get_name;
4885       UConverter *cnv1;
4886       UConverter *cnv2;
4887 
4888       int8_t len = sizeof(get_subchars);
4889 
4890       UErrorCode errorCode=U_ZERO_ERROR;
4891 
4892       /* Open */
4893       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4894       if(U_FAILURE(errorCode)) {
4895          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4896          return;
4897       }
4898       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4899       if(U_FAILURE(errorCode)) {
4900          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4901          return;
4902       }
4903 
4904       /* Name */
4905       get_name = ucnv_getName (cnv1, &errorCode);
4906       if (strcmp(NAME_LMBCS_1,get_name)){
4907          log_err("Unexpected converter name: %s\n", get_name);
4908       }
4909       get_name = ucnv_getName (cnv2, &errorCode);
4910       if (strcmp(NAME_LMBCS_2,get_name)){
4911          log_err("Unexpected converter name: %s\n", get_name);
4912       }
4913 
4914       /* substitution chars */
4915       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4916       if(U_FAILURE(errorCode)) {
4917          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4918       }
4919       if (len!=1){
4920          log_err("Unexpected length of sub chars\n");
4921       }
4922       if (get_subchars[0] != expected_subchars[0]){
4923            log_err("Unexpected value of sub chars\n");
4924       }
4925       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4926       if(U_FAILURE(errorCode)) {
4927          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4928       }
4929       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4930       if(U_FAILURE(errorCode)) {
4931          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4932       }
4933       if (len!=1){
4934          log_err("Unexpected length of sub chars\n");
4935       }
4936       if (get_subchars[0] != new_subchars[0]){
4937            log_err("Unexpected value of sub chars\n");
4938       }
4939       ucnv_close(cnv1);
4940       ucnv_close(cnv2);
4941 
4942     }
4943 
4944     /* LMBCS to Unicode - offsets */
4945     {
4946        UErrorCode errorCode=U_ZERO_ERROR;
4947 
4948        const char * pSource = (const char *)pszLMBCS;
4949        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4950 
4951        UChar Out [sizeof(pszUnicode) + 1];
4952        UChar * pOut = Out;
4953        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4954 
4955        int32_t off [sizeof(offsets)];
4956 
4957       /* last 'offset' in expected results is just the final size.
4958          (Makes other tests easier). Compensate here: */
4959 
4960        off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4961 
4962 
4963 
4964       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4965       if(U_FAILURE(errorCode)) {
4966            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4967            return;
4968       }
4969 
4970 
4971 
4972       ucnv_toUnicode (cnv,
4973                       &pOut,
4974                       OutLimit,
4975                       &pSource,
4976                       sourceLimit,
4977                       off,
4978                       true,
4979                       &errorCode);
4980 
4981 
4982        if (memcmp(off,offsets,sizeof(offsets)))
4983        {
4984          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4985        }
4986        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4987        {
4988          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4989        }
4990        ucnv_close(cnv);
4991     }
4992     {
4993    /* LMBCS to Unicode - getNextUChar */
4994       const char * sourceStart;
4995       const char *source=(const char *)pszLMBCS;
4996       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4997       const UChar32 *results= pszUnicode32;
4998       const int *off = offsets32;
4999 
5000       UErrorCode errorCode=U_ZERO_ERROR;
5001       UChar32 uniChar;
5002 
5003       cnv=ucnv_open("LMBCS-1", &errorCode);
5004       if(U_FAILURE(errorCode)) {
5005            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5006            return;
5007       }
5008       else
5009       {
5010 
5011          while(source<limit) {
5012             sourceStart=source;
5013             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5014             if(U_FAILURE(errorCode)) {
5015                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5016                   break;
5017             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5018                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5019                    uniChar, (source-sourceStart), *results, *off);
5020                break;
5021             }
5022             results++;
5023             off++;
5024          }
5025        }
5026        ucnv_close(cnv);
5027     }
5028     { /* test locale & optimization group operations: Unicode to LMBCS */
5029 
5030       UErrorCode errorCode=U_ZERO_ERROR;
5031       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5032       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5033       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5034       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5035       const UChar * pUniOut = uniString;
5036       UChar * pUniIn = uniString;
5037       uint8_t lmbcsString [4];
5038       const char * pLMBCSOut = (const char *)lmbcsString;
5039       char * pLMBCSIn = (char *)lmbcsString;
5040 
5041       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5042       ucnv_fromUnicode (cnv16he,
5043                         &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5044                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5045                         NULL, 1, &errorCode);
5046 
5047       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5048       {
5049          log_err("LMBCS-16,locale=he gives unexpected translation\n");
5050       }
5051 
5052       pLMBCSIn= (char *)lmbcsString;
5053       pUniOut = uniString;
5054       ucnv_fromUnicode (cnv01us,
5055                         &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5056                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5057                         NULL, 1, &errorCode);
5058 
5059       if (lmbcsString[0] != 0x9F)
5060       {
5061          log_err("LMBCS-1,locale=US gives unexpected translation\n");
5062       }
5063 
5064       /* single byte char from mbcs char set */
5065       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5066       pLMBCSOut = (const char *)lmbcsString;
5067       pUniIn = uniString;
5068       ucnv_toUnicode (cnv16jp,
5069                         &pUniIn, pUniIn + 1,
5070                         &pLMBCSOut, (pLMBCSOut + 1),
5071                         NULL, 1, &errorCode);
5072       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5073       {
5074            log_err("Unexpected results from LMBCS-16 single byte char\n");
5075       }
5076       /* convert to group 1: should be 3 bytes */
5077       pLMBCSIn = (char *)lmbcsString;
5078       pUniOut = uniString;
5079       ucnv_fromUnicode (cnv01us,
5080                         &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5081                         &pUniOut, pUniOut + 1,
5082                         NULL, 1, &errorCode);
5083       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5084          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5085       {
5086            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5087       }
5088       pLMBCSOut = (const char *)lmbcsString;
5089       pUniIn = uniString;
5090       ucnv_toUnicode (cnv01us,
5091                         &pUniIn, pUniIn + 1,
5092                         &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5093                         NULL, 1, &errorCode);
5094       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5095       {
5096            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5097       }
5098       pLMBCSIn = (char *)lmbcsString;
5099       pUniOut = uniString;
5100       ucnv_fromUnicode (cnv16jp,
5101                         &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5102                         &pUniOut, pUniOut + 1,
5103                         NULL, 1, &errorCode);
5104       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5105       {
5106            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5107       }
5108       ucnv_close(cnv16he);
5109       ucnv_close(cnv16jp);
5110       ucnv_close(cnv01us);
5111     }
5112     {
5113        /* Small source buffer testing, LMBCS -> Unicode */
5114 
5115        UErrorCode errorCode=U_ZERO_ERROR;
5116 
5117        const char * pSource = (const char *)pszLMBCS;
5118        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5119        int codepointCount = 0;
5120 
5121        UChar Out [sizeof(pszUnicode) + 1];
5122        UChar * pOut = Out;
5123        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5124 
5125 
5126        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5127        if(U_FAILURE(errorCode)) {
5128            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5129            return;
5130        }
5131 
5132 
5133        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5134        {
5135            ucnv_toUnicode (cnv,
5136                &pOut,
5137                OutLimit,
5138                &pSource,
5139                (pSource+1), /* claim that this is a 1- byte buffer */
5140                NULL,
5141                false,    /* false means there might be more chars in the next buffer */
5142                &errorCode);
5143 
5144            if (U_SUCCESS (errorCode))
5145            {
5146                if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5147                {
5148                    /* we are on to the next code point: check value */
5149 
5150                    if (Out[0] != pszUnicode[codepointCount]){
5151                        log_err("LMBCS->Uni result %lx should have been %lx \n",
5152                            Out[0], pszUnicode[codepointCount]);
5153                    }
5154 
5155                    pOut = Out; /* reset for accumulating next code point */
5156                    codepointCount++;
5157                }
5158            }
5159            else
5160            {
5161                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5162            }
5163        }
5164        {
5165          /* limits & surrogate error testing */
5166          char LIn [sizeof(pszLMBCS)];
5167          const char * pLIn = LIn;
5168 
5169          char LOut [sizeof(pszLMBCS)];
5170          char * pLOut = LOut;
5171 
5172          UChar UOut [sizeof(pszUnicode)];
5173          UChar * pUOut = UOut;
5174 
5175          UChar UIn [sizeof(pszUnicode)];
5176          const UChar * pUIn = UIn;
5177 
5178          int32_t off [sizeof(offsets)];
5179          UChar32 uniChar;
5180 
5181          errorCode=U_ZERO_ERROR;
5182 
5183          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5184          pUIn++;
5185          ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, false, &errorCode);
5186          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5187          {
5188             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5189          }
5190          pUIn--;
5191 
5192          errorCode=U_ZERO_ERROR;
5193          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,false, &errorCode);
5194          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5195          {
5196             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5197          }
5198          errorCode=U_ZERO_ERROR;
5199 
5200          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5201          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5202          {
5203             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5204          }
5205          errorCode=U_ZERO_ERROR;
5206 
5207          /* 0 byte source request - no error, no pointer movement */
5208          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,false, &errorCode);
5209          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,false, &errorCode);
5210          if(U_FAILURE(errorCode)) {
5211             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5212          }
5213          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5214          {
5215               log_err("Unexpected pointer move in 0 byte source request \n");
5216          }
5217          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5218          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5219          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5220          {
5221             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5222          }
5223          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5224          {
5225             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5226          }
5227          errorCode = U_ZERO_ERROR;
5228 
5229          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5230 
5231          pUIn = pszUnicode;
5232          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,false, &errorCode);
5233          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5234          {
5235             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5236          }
5237 
5238          errorCode = U_ZERO_ERROR;
5239 
5240          pLIn = (const char *)pszLMBCS;
5241          ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,false, &errorCode);
5242          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5243          {
5244             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5245          }
5246 
5247          /* unpaired or chopped LMBCS surrogates */
5248 
5249          /* OK high surrogate, Low surrogate is chopped */
5250          LIn [0] = (char)0x14;
5251          LIn [1] = (char)0xD8;
5252          LIn [2] = (char)0x01;
5253          LIn [3] = (char)0x14;
5254          LIn [4] = (char)0xDC;
5255          pLIn = LIn;
5256          errorCode = U_ZERO_ERROR;
5257          pUOut = UOut;
5258 
5259          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5260          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5261          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5262          {
5263             log_err("Unexpected results on chopped low surrogate\n");
5264          }
5265 
5266          /* chopped at surrogate boundary */
5267          LIn [0] = (char)0x14;
5268          LIn [1] = (char)0xD8;
5269          LIn [2] = (char)0x01;
5270          pLIn = LIn;
5271          errorCode = U_ZERO_ERROR;
5272          pUOut = UOut;
5273 
5274          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,true, &errorCode);
5275          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5276          {
5277             log_err("Unexpected results on chopped at surrogate boundary \n");
5278          }
5279 
5280          /* unpaired surrogate plus valid Unichar */
5281          LIn [0] = (char)0x14;
5282          LIn [1] = (char)0xD8;
5283          LIn [2] = (char)0x01;
5284          LIn [3] = (char)0x14;
5285          LIn [4] = (char)0xC9;
5286          LIn [5] = (char)0xD0;
5287          pLIn = LIn;
5288          errorCode = U_ZERO_ERROR;
5289          pUOut = UOut;
5290 
5291          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,true, &errorCode);
5292          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5293          {
5294             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5295          }
5296 
5297       /* unpaired surrogate plus chopped Unichar */
5298          LIn [0] = (char)0x14;
5299          LIn [1] = (char)0xD8;
5300          LIn [2] = (char)0x01;
5301          LIn [3] = (char)0x14;
5302          LIn [4] = (char)0xC9;
5303 
5304          pLIn = LIn;
5305          errorCode = U_ZERO_ERROR;
5306          pUOut = UOut;
5307 
5308          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5309          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5310          {
5311             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5312          }
5313 
5314          /* unpaired surrogate plus valid non-Unichar */
5315          LIn [0] = (char)0x14;
5316          LIn [1] = (char)0xD8;
5317          LIn [2] = (char)0x01;
5318          LIn [3] = (char)0x0F;
5319          LIn [4] = (char)0x3B;
5320 
5321          pLIn = LIn;
5322          errorCode = U_ZERO_ERROR;
5323          pUOut = UOut;
5324 
5325          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5326          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5327          {
5328             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5329          }
5330 
5331          /* unpaired surrogate plus chopped non-Unichar */
5332          LIn [0] = (char)0x14;
5333          LIn [1] = (char)0xD8;
5334          LIn [2] = (char)0x01;
5335          LIn [3] = (char)0x0F;
5336 
5337          pLIn = LIn;
5338          errorCode = U_ZERO_ERROR;
5339          pUOut = UOut;
5340 
5341          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,true, &errorCode);
5342 
5343          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5344          {
5345             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5346          }
5347        }
5348     }
5349    ucnv_close(cnv);  /* final cleanup */
5350 }
5351 
5352 
TestJitterbug255(void)5353 static void TestJitterbug255(void)
5354 {
5355     static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5356     const char *testBuffer = (const char *)testBytes;
5357     const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5358     UErrorCode status = U_ZERO_ERROR;
5359     /*UChar32 result;*/
5360     UConverter *cnv = 0;
5361 
5362     cnv = ucnv_open("shift-jis", &status);
5363     if (U_FAILURE(status) || cnv == 0) {
5364         log_data_err("Failed to open the converter for SJIS.\n");
5365                 return;
5366     }
5367     while (testBuffer != testEnd)
5368     {
5369         /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5370         if (U_FAILURE(status))
5371         {
5372             log_err("Failed to convert the next UChar for SJIS.\n");
5373             break;
5374         }
5375     }
5376     ucnv_close(cnv);
5377 }
5378 
TestEBCDICUS4XML(void)5379 static void TestEBCDICUS4XML(void)
5380 {
5381     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5382     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5383     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5384     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5385     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5386     UChar *unicodes = unicodes_x;
5387     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5388     char *target = target_x;
5389     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5390     UErrorCode status = U_ZERO_ERROR;
5391     UConverter *cnv = 0;
5392 
5393     cnv = ucnv_open("ebcdic-xml-us", &status);
5394     if (U_FAILURE(status) || cnv == 0) {
5395         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5396         return;
5397     }
5398     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, true, &status);
5399     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5400         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5401             u_errorName(status));
5402         printUSeqErr(unicodes_x, 3);
5403         printUSeqErr(toUnicodeMaps, 3);
5404     }
5405     status = U_ZERO_ERROR;
5406     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, true, &status);
5407     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5408         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5409             u_errorName(status));
5410         printSeqErr((const unsigned char*)target_x, 3);
5411         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5412     }
5413     ucnv_close(cnv);
5414 }
5415 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5416 
5417 #if !UCONFIG_NO_COLLATION
5418 
TestJitterbug981(void)5419 static void TestJitterbug981(void){
5420     const UChar* rules;
5421     int32_t rules_length, target_cap, bytes_needed, buff_size;
5422     UErrorCode status = U_ZERO_ERROR;
5423     UConverter *utf8cnv;
5424     UCollator* myCollator;
5425     char *buff;
5426     int numNeeded=0;
5427     utf8cnv = ucnv_open ("utf8", &status);
5428     if(U_FAILURE(status)){
5429         log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5430         return;
5431     }
5432     myCollator = ucol_open("zh", &status);
5433     if(U_FAILURE(status)){
5434         log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5435         ucnv_close(utf8cnv);
5436         return;
5437     }
5438 
5439     rules = ucol_getRules(myCollator, &rules_length);
5440     if(rules_length == 0) {
5441         log_data_err("missing zh tailoring rule string\n");
5442         ucol_close(myCollator);
5443         ucnv_close(utf8cnv);
5444         return;
5445     }
5446     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5447     buff = malloc(buff_size);
5448 
5449     target_cap = 0;
5450     do {
5451         ucnv_reset(utf8cnv);
5452         status = U_ZERO_ERROR;
5453         if(target_cap >= buff_size) {
5454             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5455             break;
5456         }
5457         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5458             rules, rules_length, &status);
5459         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5460         if(numNeeded!=0 && numNeeded!= bytes_needed){
5461             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5462             break;
5463         }
5464         numNeeded = bytes_needed;
5465     } while (status == U_BUFFER_OVERFLOW_ERROR);
5466     ucol_close(myCollator);
5467     ucnv_close(utf8cnv);
5468     free(buff);
5469 }
5470 
5471 #endif
5472 
5473 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293(void)5474 static void TestJitterbug1293(void){
5475     static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5476     char target[256];
5477     UErrorCode status = U_ZERO_ERROR;
5478     UConverter* conv=NULL;
5479     int32_t target_cap, bytes_needed, numNeeded = 0;
5480     conv = ucnv_open("shift-jis",&status);
5481     if(U_FAILURE(status)){
5482       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5483       return;
5484     }
5485 
5486     do{
5487         target_cap =0;
5488         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5489         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5490         if(numNeeded!=0 && numNeeded!= bytes_needed){
5491           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5492         }
5493         numNeeded = bytes_needed;
5494     } while (status == U_BUFFER_OVERFLOW_ERROR);
5495     if(U_FAILURE(status)){
5496       log_err("An error occurred in ucnv_fromUChars. Error: %s", u_errorName(status));
5497       return;
5498     }
5499     ucnv_close(conv);
5500 }
5501 #endif
5502 
TestJB5275_1(void)5503 static void TestJB5275_1(void){
5504 
5505     static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5506                                 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5507                                 /* Switch script: */
5508                                 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5509                                 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5510                                 "\xEF\x40\x3B\xB3\x0A";
5511     static const UChar expected[] ={
5512             0x003b, 0x0a15, 0x000a, /* Easy characters */
5513             0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5514             0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5515             0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5516             0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5517     };
5518 
5519     UErrorCode status = U_ZERO_ERROR;
5520     UConverter* conv = ucnv_open("iscii-gur", &status);
5521     UChar dest[100] = {'\0'};
5522     UChar* target = dest;
5523     UChar* targetLimit = dest+100;
5524     const char* source = data;
5525     const char* sourceLimit = data+strlen(data);
5526     const UChar* exp = expected;
5527 
5528     if (U_FAILURE(status)) {
5529         log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5530         return;
5531     }
5532 
5533     log_verbose("Testing switching back to default script when new line is encountered.\n");
5534     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5535     if(U_FAILURE(status)){
5536         log_err("conversion failed: %s \n", u_errorName(status));
5537     }
5538     targetLimit = target;
5539     target = dest;
5540     printUSeq(target, (int)(targetLimit-target));
5541     while(target<targetLimit){
5542         if(*exp!=*target){
5543             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5544         }
5545         target++;
5546         exp++;
5547     }
5548     ucnv_close(conv);
5549 }
5550 
TestJB5275(void)5551 static void TestJB5275(void){
5552     static const char* data =
5553     /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5554     /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5555     /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5556         "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5557         "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5558         "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5559         "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5560         "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5561         "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5562         /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5563     static const UChar expected[] ={
5564         0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5565         0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5566         0x0038, 0x0C95, 0x000A, /* Kannada test */
5567         0x0039, 0x0D15, 0x000A, /* Malayalam test */
5568         0x003A, 0x0A95, 0x000A, /* Gujarati test */
5569         0x003B, 0x0A15, 0x000A, /* Punjabi test */
5570     };
5571 
5572     UErrorCode status = U_ZERO_ERROR;
5573     UConverter* conv = ucnv_open("iscii", &status);
5574     UChar dest[100] = {'\0'};
5575     UChar* target = dest;
5576     UChar* targetLimit = dest+100;
5577     const char* source = data;
5578     const char* sourceLimit = data+strlen(data);
5579     const UChar* exp = expected;
5580     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5581     if(U_FAILURE(status)){
5582         log_data_err("conversion failed: %s \n", u_errorName(status));
5583     }
5584     targetLimit = target;
5585     target = dest;
5586 
5587     printUSeq(target, (int)(targetLimit-target));
5588 
5589     while(target<targetLimit){
5590         if(*exp!=*target){
5591             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5592         }
5593         target++;
5594         exp++;
5595     }
5596     ucnv_close(conv);
5597 }
5598 
5599 static void
TestIsFixedWidth(void)5600 TestIsFixedWidth(void) {
5601     UErrorCode status = U_ZERO_ERROR;
5602     UConverter *cnv = NULL;
5603     int32_t i;
5604 
5605     const char *fixedWidth[] = {
5606             "US-ASCII",
5607             "UTF32",
5608             "ibm-5478_P100-1995"
5609     };
5610 
5611     const char *notFixedWidth[] = {
5612             "GB18030",
5613             "UTF8",
5614             "windows-949-2000",
5615             "UTF16"
5616     };
5617 
5618     for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5619         cnv = ucnv_open(fixedWidth[i], &status);
5620         if (cnv == NULL || U_FAILURE(status)) {
5621             log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5622             continue;
5623         }
5624 
5625         if (!ucnv_isFixedWidth(cnv, &status)) {
5626             log_err("%s is a fixedWidth converter but returned false.\n", fixedWidth[i]);
5627         }
5628         ucnv_close(cnv);
5629     }
5630 
5631     for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5632         cnv = ucnv_open(notFixedWidth[i], &status);
5633         if (cnv == NULL || U_FAILURE(status)) {
5634             log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5635             continue;
5636         }
5637 
5638         if (ucnv_isFixedWidth(cnv, &status)) {
5639             log_err("%s is NOT a fixedWidth converter but returned true.\n", notFixedWidth[i]);
5640         }
5641         ucnv_close(cnv);
5642     }
5643 }
5644