• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // © 2016 and later: Unicode, Inc. and others.
2  // License & terms of use: http://www.unicode.org/copyright.html
3  /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2014, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  convtest.cpp
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003jul15
16  *   created by: Markus W. Scherer
17  *
18  *   Test file for data-driven conversion tests.
19  */
20  
21  #include "unicode/utypes.h"
22  
23  #if !UCONFIG_NO_LEGACY_CONVERSION
24  /*
25   * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
26   * is slightly unnecessary - it removes tests for Unicode charsets
27   * like UTF-8 that should work.
28   * However, there is no easy way for the test to detect whether a test case
29   * is for a Unicode charset, so it would be difficult to only exclude those.
30   * Also, regular testing of ICU is done with all modules on, therefore
31   * not testing conversion for a custom configuration like this should be ok.
32   */
33  
34  #include "unicode/ucnv.h"
35  #include "unicode/unistr.h"
36  #include "unicode/parsepos.h"
37  #include "unicode/uniset.h"
38  #include "unicode/ustring.h"
39  #include "unicode/ures.h"
40  #include "unicode/utf16.h"
41  #include "convtest.h"
42  #include "cmemory.h"
43  #include "unicode/tstdtmod.h"
44  #include <string.h>
45  #include <stdlib.h>
46  
47  enum {
48      // characters used in test data for callbacks
49      SUB_CB='?',
50      SKIP_CB='0',
51      STOP_CB='.',
52      ESC_CB='&'
53  };
54  
ConversionTest()55  ConversionTest::ConversionTest() {
56      UErrorCode errorCode=U_ZERO_ERROR;
57      utf8Cnv=ucnv_open("UTF-8", &errorCode);
58      ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
59      if(U_FAILURE(errorCode)) {
60          errln("unable to open UTF-8 converter");
61      }
62  }
63  
~ConversionTest()64  ConversionTest::~ConversionTest() {
65      ucnv_close(utf8Cnv);
66  }
67  
68  void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)69  ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
70      if (exec) logln("TestSuite ConversionTest: ");
71      TESTCASE_AUTO_BEGIN;
72  #if !UCONFIG_NO_FILE_IO
73      TESTCASE_AUTO(TestToUnicode);
74      TESTCASE_AUTO(TestFromUnicode);
75      TESTCASE_AUTO(TestGetUnicodeSet);
76  #endif
77      TESTCASE_AUTO(TestGetUnicodeSet2);
78      TESTCASE_AUTO(TestDefaultIgnorableCallback);
79      TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
80      TESTCASE_AUTO_END;
81  }
82  
83  // test data interface ----------------------------------------------------- ***
84  
85  void
TestToUnicode()86  ConversionTest::TestToUnicode() {
87      ConversionCase cc;
88      char charset[100], cbopt[4];
89      const char *option;
90      UnicodeString s, unicode;
91      int32_t offsetsLength;
92      UConverterToUCallback callback;
93  
94      TestDataModule *dataModule;
95      TestData *testData;
96      const DataMap *testCase;
97      UErrorCode errorCode;
98      int32_t i;
99  
100      errorCode=U_ZERO_ERROR;
101      dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
102      if(U_SUCCESS(errorCode)) {
103          testData=dataModule->createTestData("toUnicode", errorCode);
104          if(U_SUCCESS(errorCode)) {
105              for(i=0; testData->nextCase(testCase, errorCode); ++i) {
106                  if(U_FAILURE(errorCode)) {
107                      errln("error retrieving conversion/toUnicode test case %d - %s",
108                              i, u_errorName(errorCode));
109                      errorCode=U_ZERO_ERROR;
110                      continue;
111                  }
112  
113                  cc.caseNr=i;
114  
115                  s=testCase->getString("charset", errorCode);
116                  s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
117                  cc.charset=charset;
118  
119                  // BEGIN android-added
120                  // To save space, Android does not build full ISO-2022-CN tables.
121                  // We skip the TestGetKeywordValuesForLocale for counting available collations.
122                  if (strlen(charset) >= 8 &&
123                      strncmp(charset+4, "2022-CN", 4) == 0) {
124                      continue;
125                  }
126                  // END android-added
127  
128                  cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
129                  unicode=testCase->getString("unicode", errorCode);
130                  cc.unicode=unicode.getBuffer();
131                  cc.unicodeLength=unicode.length();
132  
133                  offsetsLength=0;
134                  cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
135                  if(offsetsLength==0) {
136                      cc.offsets=NULL;
137                  } else if(offsetsLength!=unicode.length()) {
138                      errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
139                              i, unicode.length(), offsetsLength);
140                      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
141                  }
142  
143                  cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
144                  cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
145  
146                  s=testCase->getString("errorCode", errorCode);
147                  if(s==UNICODE_STRING("invalid", 7)) {
148                      cc.outErrorCode=U_INVALID_CHAR_FOUND;
149                  } else if(s==UNICODE_STRING("illegal", 7)) {
150                      cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
151                  } else if(s==UNICODE_STRING("truncated", 9)) {
152                      cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
153                  } else if(s==UNICODE_STRING("illesc", 6)) {
154                      cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
155                  } else if(s==UNICODE_STRING("unsuppesc", 9)) {
156                      cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;
157                  } else {
158                      cc.outErrorCode=U_ZERO_ERROR;
159                  }
160  
161                  s=testCase->getString("callback", errorCode);
162                  s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
163                  cc.cbopt=cbopt;
164                  switch(cbopt[0]) {
165                  case SUB_CB:
166                      callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;
167                      break;
168                  case SKIP_CB:
169                      callback=UCNV_TO_U_CALLBACK_SKIP;
170                      break;
171                  case STOP_CB:
172                      callback=UCNV_TO_U_CALLBACK_STOP;
173                      break;
174                  case ESC_CB:
175                      callback=UCNV_TO_U_CALLBACK_ESCAPE;
176                      break;
177                  default:
178                      callback=NULL;
179                      break;
180                  }
181                  option=callback==NULL ? cbopt : cbopt+1;
182                  if(*option==0) {
183                      option=NULL;
184                  }
185  
186                  cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);
187  
188                  if(U_FAILURE(errorCode)) {
189                      errln("error parsing conversion/toUnicode test case %d - %s",
190                              i, u_errorName(errorCode));
191                      errorCode=U_ZERO_ERROR;
192                  } else {
193                      logln("TestToUnicode[%d] %s", i, charset);
194                      ToUnicodeCase(cc, callback, option);
195                  }
196              }
197              delete testData;
198          }
199          delete dataModule;
200      }
201      else {
202          dataerrln("Could not load test conversion data");
203      }
204  }
205  
206  void
TestFromUnicode()207  ConversionTest::TestFromUnicode() {
208      ConversionCase cc;
209      char charset[100], cbopt[4];
210      const char *option;
211      UnicodeString s, unicode, invalidUChars;
212      int32_t offsetsLength, index;
213      UConverterFromUCallback callback;
214  
215      TestDataModule *dataModule;
216      TestData *testData;
217      const DataMap *testCase;
218      const UChar *p;
219      UErrorCode errorCode;
220      int32_t i, length;
221  
222      errorCode=U_ZERO_ERROR;
223      dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
224      if(U_SUCCESS(errorCode)) {
225          testData=dataModule->createTestData("fromUnicode", errorCode);
226          if(U_SUCCESS(errorCode)) {
227              for(i=0; testData->nextCase(testCase, errorCode); ++i) {
228                  if(U_FAILURE(errorCode)) {
229                      errln("error retrieving conversion/fromUnicode test case %d - %s",
230                              i, u_errorName(errorCode));
231                      errorCode=U_ZERO_ERROR;
232                      continue;
233                  }
234  
235                  cc.caseNr=i;
236  
237                  s=testCase->getString("charset", errorCode);
238                  s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
239                  cc.charset=charset;
240  
241                  // BEGIN android-added
242                  // To save space, Android does not build full ISO-2022-CN tables.
243                  // We skip the TestGetKeywordValuesForLocale for counting available collations.
244                  if (strlen(charset) >= 8 &&
245                      strncmp(charset+4, "2022-CN", 4) == 0) {
246                      continue;
247                  }
248                  // END android-added
249  
250                  unicode=testCase->getString("unicode", errorCode);
251                  cc.unicode=unicode.getBuffer();
252                  cc.unicodeLength=unicode.length();
253                  cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
254  
255                  offsetsLength=0;
256                  cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
257                  if(offsetsLength==0) {
258                      cc.offsets=NULL;
259                  } else if(offsetsLength!=cc.bytesLength) {
260                      errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
261                              i, cc.bytesLength, offsetsLength);
262                      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
263                  }
264  
265                  cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
266                  cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
267  
268                  s=testCase->getString("errorCode", errorCode);
269                  if(s==UNICODE_STRING("invalid", 7)) {
270                      cc.outErrorCode=U_INVALID_CHAR_FOUND;
271                  } else if(s==UNICODE_STRING("illegal", 7)) {
272                      cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
273                  } else if(s==UNICODE_STRING("truncated", 9)) {
274                      cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
275                  } else {
276                      cc.outErrorCode=U_ZERO_ERROR;
277                  }
278  
279                  s=testCase->getString("callback", errorCode);
280                  cc.setSub=0; // default: no subchar
281  
282                  if((index=s.indexOf((UChar)0))>0) {
283                      // read NUL-separated subchar first, if any
284                      // copy the subchar from Latin-1 characters
285                      // start after the NUL
286                      p=s.getTerminatedBuffer();
287                      length=index+1;
288                      p+=length;
289                      length=s.length()-length;
290                      if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
291                          errorCode=U_ILLEGAL_ARGUMENT_ERROR;
292                      } else {
293                          int32_t j;
294  
295                          for(j=0; j<length; ++j) {
296                              cc.subchar[j]=(char)p[j];
297                          }
298                          // NUL-terminate the subchar
299                          cc.subchar[j]=0;
300                          cc.setSub=1;
301                      }
302  
303                      // remove the NUL and subchar from s
304                      s.truncate(index);
305                  } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
306                      // read a substitution string, separated by an equal sign
307                      p=s.getBuffer()+index+1;
308                      length=s.length()-(index+1);
309                      if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
310                          errorCode=U_ILLEGAL_ARGUMENT_ERROR;
311                      } else {
312                          u_memcpy(cc.subString, p, length);
313                          // NUL-terminate the subString
314                          cc.subString[length]=0;
315                          cc.setSub=-1;
316                      }
317  
318                      // remove the equal sign and subString from s
319                      s.truncate(index);
320                  }
321  
322                  s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
323                  cc.cbopt=cbopt;
324                  switch(cbopt[0]) {
325                  case SUB_CB:
326                      callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;
327                      break;
328                  case SKIP_CB:
329                      callback=UCNV_FROM_U_CALLBACK_SKIP;
330                      break;
331                  case STOP_CB:
332                      callback=UCNV_FROM_U_CALLBACK_STOP;
333                      break;
334                  case ESC_CB:
335                      callback=UCNV_FROM_U_CALLBACK_ESCAPE;
336                      break;
337                  default:
338                      callback=NULL;
339                      break;
340                  }
341                  option=callback==NULL ? cbopt : cbopt+1;
342                  if(*option==0) {
343                      option=NULL;
344                  }
345  
346                  invalidUChars=testCase->getString("invalidUChars", errorCode);
347                  cc.invalidUChars=invalidUChars.getBuffer();
348                  cc.invalidLength=invalidUChars.length();
349  
350                  if(U_FAILURE(errorCode)) {
351                      errln("error parsing conversion/fromUnicode test case %d - %s",
352                              i, u_errorName(errorCode));
353                      errorCode=U_ZERO_ERROR;
354                  } else {
355                      logln("TestFromUnicode[%d] %s", i, charset);
356                      FromUnicodeCase(cc, callback, option);
357                  }
358              }
359              delete testData;
360          }
361          delete dataModule;
362      }
363      else {
364          dataerrln("Could not load test conversion data");
365      }
366  }
367  
368  static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };
369  
370  void
TestGetUnicodeSet()371  ConversionTest::TestGetUnicodeSet() {
372      char charset[100];
373      UnicodeString s, map, mapnot;
374      int32_t which;
375  
376      ParsePosition pos;
377      UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
378      UnicodeSet *cnvSetPtr = &cnvSet;
379      LocalUConverterPointer cnv;
380  
381      TestDataModule *dataModule;
382      TestData *testData;
383      const DataMap *testCase;
384      UErrorCode errorCode;
385      int32_t i;
386  
387      errorCode=U_ZERO_ERROR;
388      dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
389      if(U_SUCCESS(errorCode)) {
390          testData=dataModule->createTestData("getUnicodeSet", errorCode);
391          if(U_SUCCESS(errorCode)) {
392              for(i=0; testData->nextCase(testCase, errorCode); ++i) {
393                  if(U_FAILURE(errorCode)) {
394                      errln("error retrieving conversion/getUnicodeSet test case %d - %s",
395                              i, u_errorName(errorCode));
396                      errorCode=U_ZERO_ERROR;
397                      continue;
398                  }
399  
400                  s=testCase->getString("charset", errorCode);
401                  s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
402  
403                  // BEGIN android-added
404                  // To save space, Android does not build full ISO-2022-CN tables.
405                  // We skip the TestGetKeywordValuesForLocale for counting available collations.
406                  if (strlen(charset) >= 8 &&
407                      strncmp(charset+4, "2022-CN", 4) == 0) {
408                      continue;
409                  }
410                  // END android-added
411  
412                  map=testCase->getString("map", errorCode);
413                  mapnot=testCase->getString("mapnot", errorCode);
414  
415                  which=testCase->getInt28("which", errorCode);
416  
417                  if(U_FAILURE(errorCode)) {
418                      errln("error parsing conversion/getUnicodeSet test case %d - %s",
419                              i, u_errorName(errorCode));
420                      errorCode=U_ZERO_ERROR;
421                      continue;
422                  }
423  
424                  // test this test case
425                  mapSet.clear();
426                  mapnotSet.clear();
427  
428                  pos.setIndex(0);
429                  mapSet.applyPattern(map, pos, 0, NULL, errorCode);
430                  if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {
431                      errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
432                            "    error index %d  index %d  U+%04x",
433                              i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));
434                      errorCode=U_ZERO_ERROR;
435                      continue;
436                  }
437  
438                  pos.setIndex(0);
439                  mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);
440                  if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {
441                      errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
442                            "    error index %d  index %d  U+%04x",
443                              i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));
444                      errorCode=U_ZERO_ERROR;
445                      continue;
446                  }
447  
448                  logln("TestGetUnicodeSet[%d] %s", i, charset);
449  
450                  cnv.adoptInstead(cnv_open(charset, errorCode));
451                  if(U_FAILURE(errorCode)) {
452                      errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
453                              charset, i, u_errorName(errorCode));
454                      errorCode=U_ZERO_ERROR;
455                      continue;
456                  }
457  
458                  ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
459  
460                  if(U_FAILURE(errorCode)) {
461                      errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
462                              charset, i, u_errorName(errorCode));
463                      errorCode=U_ZERO_ERROR;
464                      continue;
465                  }
466  
467                  // are there items that must be in cnvSet but are not?
468                  (diffSet=mapSet).removeAll(cnvSet);
469                  if(!diffSet.isEmpty()) {
470                      diffSet.toPattern(s, TRUE);
471                      if(s.length()>100) {
472                          s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
473                      }
474                      errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
475                              charset, i);
476                      errln(s);
477                  }
478  
479                  // are there items that must not be in cnvSet but are?
480                  (diffSet=mapnotSet).retainAll(cnvSet);
481                  if(!diffSet.isEmpty()) {
482                      diffSet.toPattern(s, TRUE);
483                      if(s.length()>100) {
484                          s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
485                      }
486                      errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
487                              charset, i);
488                      errln(s);
489                  }
490              }
491              delete testData;
492          }
493          delete dataModule;
494      }
495      else {
496          dataerrln("Could not load test conversion data");
497      }
498  }
499  
500  U_CDECL_BEGIN
501  static void U_CALLCONV
getUnicodeSetCallback(const void * context,UConverterFromUnicodeArgs *,const UChar *,int32_t,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)502  getUnicodeSetCallback(const void *context,
503                        UConverterFromUnicodeArgs * /*fromUArgs*/,
504                        const UChar* /*codeUnits*/,
505                        int32_t /*length*/,
506                        UChar32 codePoint,
507                        UConverterCallbackReason reason,
508                        UErrorCode *pErrorCode) {
509      if(reason<=UCNV_IRREGULAR) {
510          ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
511          *pErrorCode=U_ZERO_ERROR;                    // skip
512      }  // else ignore the reset, close and clone calls.
513  }
514  U_CDECL_END
515  
516  // Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
517  void
TestGetUnicodeSet2()518  ConversionTest::TestGetUnicodeSet2() {
519      // Build a string with all code points.
520      UChar32 cpLimit;
521      int32_t s0Length;
522      if(quick) {
523          cpLimit=s0Length=0x10000;  // BMP only
524      } else {
525          cpLimit=0x110000;
526          s0Length=0x10000+0x200000;  // BMP + surrogate pairs
527      }
528      UChar *s0=new UChar[s0Length];
529      if(s0==NULL) {
530          return;
531      }
532      UChar *s=s0;
533      UChar32 c;
534      UChar c2;
535      // low BMP
536      for(c=0; c<=0xd7ff; ++c) {
537          *s++=(UChar)c;
538      }
539      // trail surrogates
540      for(c=0xdc00; c<=0xdfff; ++c) {
541          *s++=(UChar)c;
542      }
543      // lead surrogates
544      // (after trails so that there is not even one surrogate pair in between)
545      for(c=0xd800; c<=0xdbff; ++c) {
546          *s++=(UChar)c;
547      }
548      // high BMP
549      for(c=0xe000; c<=0xffff; ++c) {
550          *s++=(UChar)c;
551      }
552      // supplementary code points = surrogate pairs
553      if(cpLimit==0x110000) {
554          for(c=0xd800; c<=0xdbff; ++c) {
555              for(c2=0xdc00; c2<=0xdfff; ++c2) {
556                  *s++=(UChar)c;
557                  *s++=c2;
558              }
559          }
560      }
561  
562      static const char *const cnvNames[]={
563          "UTF-8",
564          "UTF-7",
565          "UTF-16",
566          "US-ASCII",
567          "ISO-8859-1",
568          "windows-1252",
569          "Shift-JIS",
570          "ibm-1390",  // EBCDIC_STATEFUL table
571          "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
572          "HZ",
573          "ISO-2022-JP",
574          "JIS7",
575          "ISO-2022-CN",
576          "ISO-2022-CN-EXT",
577          "LMBCS"
578      };
579      LocalUConverterPointer cnv;
580      char buffer[1024];
581      int32_t i;
582      for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
583          UErrorCode errorCode=U_ZERO_ERROR;
584          cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
585          if(U_FAILURE(errorCode)) {
586              errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
587              continue;
588          }
589          UnicodeSet expected;
590          ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
591          if(U_FAILURE(errorCode)) {
592              errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
593              continue;
594          }
595          UConverterUnicodeSet which;
596          for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
597              if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
598                  ucnv_setFallback(cnv.getAlias(), TRUE);
599              }
600              expected.add(0, cpLimit-1);
601              s=s0;
602              UBool flush;
603              do {
604                  char *t=buffer;
605                  flush=(UBool)(s==s0+s0Length);
606                  ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
607                  if(U_FAILURE(errorCode)) {
608                      if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
609                          errorCode=U_ZERO_ERROR;
610                          continue;
611                      } else {
612                          break;  // unexpected error, should not occur
613                      }
614                  }
615              } while(!flush);
616              UnicodeSet set;
617              ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
618              if(cpLimit<0x110000) {
619                  set.remove(cpLimit, 0x10ffff);
620              }
621              if(which==UCNV_ROUNDTRIP_SET) {
622                  // ignore PUA code points because they will be converted even if they
623                  // are fallbacks and when other fallbacks are turned off,
624                  // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
625                  expected.remove(0xe000, 0xf8ff);
626                  expected.remove(0xf0000, 0xffffd);
627                  expected.remove(0x100000, 0x10fffd);
628                  set.remove(0xe000, 0xf8ff);
629                  set.remove(0xf0000, 0xffffd);
630                  set.remove(0x100000, 0x10fffd);
631              }
632              if(set!=expected) {
633                  // First try to see if we have different sets because ucnv_getUnicodeSet()
634                  // added strings: The above conversion method does not tell us what strings might be convertible.
635                  // Remove strings from the set and compare again.
636                  set.removeAllStrings();
637              }
638              if(set!=expected) {
639                  UnicodeSet diffSet;
640                  UnicodeString out;
641  
642                  // are there items that must be in the set but are not?
643                  (diffSet=expected).removeAll(set);
644                  if(!diffSet.isEmpty()) {
645                      diffSet.toPattern(out, TRUE);
646                      if(out.length()>100) {
647                          out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
648                      }
649                      errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
650                              cnvNames[i], which);
651                      errln(out);
652                  }
653  
654                  // are there items that must not be in the set but are?
655                  (diffSet=set).removeAll(expected);
656                  if(!diffSet.isEmpty()) {
657                      diffSet.toPattern(out, TRUE);
658                      if(out.length()>100) {
659                          out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
660                      }
661                      errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
662                              cnvNames[i], which);
663                      errln(out);
664                  }
665              }
666          }
667      }
668  
669      delete [] s0;
670  }
671  
672  // Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
673  // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
674  void
TestDefaultIgnorableCallback()675  ConversionTest::TestDefaultIgnorableCallback() {
676      UErrorCode status = U_ZERO_ERROR;
677      const char *cnv_name = "euc-jp-2007";
678      const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
679      const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
680  
681      LocalPointer<UnicodeSet> set_ignorable(new UnicodeSet(pattern_ignorable, status));
682      if (U_FAILURE(status)) {
683          dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
684          return;
685      }
686  
687      LocalPointer<UnicodeSet> set_not_ignorable(new UnicodeSet(pattern_not_ignorable, status));
688      if (U_FAILURE(status)) {
689          dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
690          return;
691      }
692  
693      LocalUConverterPointer cnv(cnv_open(cnv_name, status));
694      if (U_FAILURE(status)) {
695          dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
696          return;
697      }
698  
699      // set callback for the converter
700      ucnv_setFromUCallBack(cnv.getAlias(), UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
701  
702      UChar32 input[1];
703      char output[10];
704      int32_t outputLength;
705  
706      // test default ignorables are ignored
707      int size = set_ignorable->size();
708      for (int i = 0; i < size; i++) {
709          status = U_ZERO_ERROR;
710          outputLength= 0;
711  
712          input[0] = set_ignorable->charAt(i);
713  
714          outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
715          if (U_FAILURE(status) || outputLength != 0) {
716              errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
717          }
718      }
719  
720      // test non-ignorables are not ignored
721      size = set_not_ignorable->size();
722      for (int i = 0; i < size; i++) {
723          status = U_ZERO_ERROR;
724          outputLength= 0;
725  
726          input[0] = set_not_ignorable->charAt(i);
727  
728          if (input[0] == 0) {
729              continue;
730          }
731  
732          outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
733          if (U_FAILURE(status) || outputLength <= 0) {
734              errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
735          }
736      }
737  }
738  
739  void
TestUTF8ToUTF8Overflow()740  ConversionTest::TestUTF8ToUTF8Overflow() {
741      IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
742      LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
743      LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
744      static const char *text = "aä";  // ä: 2 bytes
745      const char *source = text;
746      const char *sourceLimit = text + strlen(text);
747      char result[20];
748      char *target = result;
749      const char *targetLimit = result + sizeof(result);
750      UChar buffer16[20];
751      UChar *pivotSource = buffer16;
752      UChar *pivotTarget = buffer16;
753      const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
754      int32_t length;
755  
756      // Convert with insufficient target capacity.
757      result[2] = 5;
758      ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
759                     &target, result + 2, &source, sourceLimit,
760                     buffer16, &pivotSource, &pivotTarget, pivotLimit,
761                     FALSE, FALSE, errorCode);
762      assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
763      length = (int32_t)(target - result);
764      assertEquals("number of bytes written", 2, length);
765      assertEquals("next byte not clobbered", 5, result[2]);
766  
767      // Convert the rest and flush.
768      ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
769                     &target, targetLimit, &source, sourceLimit,
770                     buffer16, &pivotSource, &pivotTarget, pivotLimit,
771                     FALSE, TRUE, errorCode);
772  
773      assertSuccess("UTF-8->UTF-8", errorCode);
774      length = (int32_t)(target - result);
775      assertEquals("3 bytes", 3, length);
776      if (length == 3) {
777          assertTrue("result same as input", memcmp(text, result, length) == 0);
778      }
779  
780      ucnv_reset(cnv1.getAlias());
781      ucnv_reset(cnv2.getAlias());
782      memset(result, 0, sizeof(result));
783      static const char *text2 = "a��";  // U+1F6B2 bicycle: 4 bytes
784      source = text2;
785      sourceLimit = text2 + strlen(text2);
786      target = result;
787      pivotSource = pivotTarget = buffer16;
788  
789      // Convert with insufficient target capacity.
790      result[3] = 5;
791      ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
792                     &target, result + 3, &source, sourceLimit,
793                     buffer16, &pivotSource, &pivotTarget, pivotLimit,
794                     FALSE, FALSE, errorCode);
795      assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
796      length = (int32_t)(target - result);
797      assertEquals("text2 number of bytes written", 3, length);
798      assertEquals("text2 next byte not clobbered", 5, result[3]);
799  
800      // Convert the rest and flush.
801      ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
802                     &target, targetLimit, &source, sourceLimit,
803                     buffer16, &pivotSource, &pivotTarget, pivotLimit,
804                     FALSE, TRUE, errorCode);
805  
806      assertSuccess("text2 UTF-8->UTF-8", errorCode);
807      length = (int32_t)(target - result);
808      assertEquals("text2 5 bytes", 5, length);
809      if (length == 5) {
810          assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
811      }
812  
813      ucnv_reset(cnv1.getAlias());
814      ucnv_reset(cnv2.getAlias());
815      memset(result, 0, sizeof(result));
816      static const char *illFormed = "\xf1\x91\x93\x96\x91\x94";  // U+514D6 + two more trail bytes
817      source = illFormed;
818      sourceLimit = illFormed + strlen(illFormed);
819      target = result;
820      pivotSource = pivotTarget = buffer16;
821  
822      ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode);
823  
824      // Convert only two bytes and flush (but expect failure).
825      char errorBytes[10];
826      int8_t errorLength;
827      result[0] = 5;
828      ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
829                     &target, targetLimit, &source, source + 2,
830                     buffer16, &pivotSource, &pivotTarget, pivotLimit,
831                     FALSE, TRUE, errorCode);
832      assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset());
833      length = (int32_t)(target - result);
834      assertEquals("illFormed number of bytes written", 0, length);
835      errorLength = UPRV_LENGTHOF(errorBytes);
836      ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
837      assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength);
838      if (errorLength == 2) {
839          assertEquals("illFormed truncated errorBytes", 0xf191,
840                       ((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]);
841      }
842  
843      // Continue conversion starting with a trail byte.
844      ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
845                     &target, targetLimit, &source, sourceLimit,
846                     buffer16, &pivotSource, &pivotTarget, pivotLimit,
847                     FALSE, TRUE, errorCode);
848  
849      assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset());
850      length = (int32_t)(target - result);
851      assertEquals("illFormed trail byte number of bytes written", 0, length);
852      errorLength = UPRV_LENGTHOF(errorBytes);
853      ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
854      assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength);
855      if (errorLength == 1) {
856          assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]);
857      }
858  }
859  
860  // open testdata or ICU data converter ------------------------------------- ***
861  
862  UConverter *
cnv_open(const char * name,UErrorCode & errorCode)863  ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
864      if(name!=NULL && *name=='+') {
865          // Converter names that start with '+' are ignored in ICU4J tests.
866          ++name;
867      }
868      if(name!=NULL && *name=='*') {
869          /* loadTestData(): set the data directory */
870          return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
871      } else {
872          return ucnv_open(name, &errorCode);
873      }
874  }
875  
876  // output helpers ---------------------------------------------------------- ***
877  
878  static inline char
hexDigit(uint8_t digit)879  hexDigit(uint8_t digit) {
880      return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
881  }
882  
883  static char *
printBytes(const uint8_t * bytes,int32_t length,char * out)884  printBytes(const uint8_t *bytes, int32_t length, char *out) {
885      uint8_t b;
886  
887      if(length>0) {
888          b=*bytes++;
889          --length;
890          *out++=hexDigit((uint8_t)(b>>4));
891          *out++=hexDigit((uint8_t)(b&0xf));
892      }
893  
894      while(length>0) {
895          b=*bytes++;
896          --length;
897          *out++=' ';
898          *out++=hexDigit((uint8_t)(b>>4));
899          *out++=hexDigit((uint8_t)(b&0xf));
900      }
901      *out++=0;
902      return out;
903  }
904  
905  static char *
printUnicode(const UChar * unicode,int32_t length,char * out)906  printUnicode(const UChar *unicode, int32_t length, char *out) {
907      UChar32 c;
908      int32_t i;
909  
910      for(i=0; i<length;) {
911          if(i>0) {
912              *out++=' ';
913          }
914          U16_NEXT(unicode, i, length, c);
915          // write 4..6 digits
916          if(c>=0x100000) {
917              *out++='1';
918          }
919          if(c>=0x10000) {
920              *out++=hexDigit((uint8_t)((c>>16)&0xf));
921          }
922          *out++=hexDigit((uint8_t)((c>>12)&0xf));
923          *out++=hexDigit((uint8_t)((c>>8)&0xf));
924          *out++=hexDigit((uint8_t)((c>>4)&0xf));
925          *out++=hexDigit((uint8_t)(c&0xf));
926      }
927      *out++=0;
928      return out;
929  }
930  
931  static char *
printOffsets(const int32_t * offsets,int32_t length,char * out)932  printOffsets(const int32_t *offsets, int32_t length, char *out) {
933      int32_t i, o, d;
934  
935      if(offsets==NULL) {
936          length=0;
937      }
938  
939      for(i=0; i<length; ++i) {
940          if(i>0) {
941              *out++=' ';
942          }
943          o=offsets[i];
944  
945          // print all offsets with 2 characters each (-x, -9..99, xx)
946          if(o<-9) {
947              *out++='-';
948              *out++='x';
949          } else if(o<0) {
950              *out++='-';
951              *out++=(char)('0'-o);
952          } else if(o<=99) {
953              *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);
954              *out++=(char)('0'+o%10);
955          } else /* o>99 */ {
956              *out++='x';
957              *out++='x';
958          }
959      }
960      *out++=0;
961      return out;
962  }
963  
964  // toUnicode test worker functions ----------------------------------------- ***
965  
966  static int32_t
stepToUnicode(ConversionCase & cc,UConverter * cnv,UChar * result,int32_t resultCapacity,int32_t * resultOffsets,int32_t step,UErrorCode * pErrorCode)967  stepToUnicode(ConversionCase &cc, UConverter *cnv,
968                UChar *result, int32_t resultCapacity,
969                int32_t *resultOffsets, /* also resultCapacity */
970                int32_t step,
971                UErrorCode *pErrorCode) {
972      const char *source, *sourceLimit, *bytesLimit;
973      UChar *target, *targetLimit, *resultLimit;
974      UBool flush;
975  
976      source=(const char *)cc.bytes;
977      target=result;
978      bytesLimit=source+cc.bytesLength;
979      resultLimit=result+resultCapacity;
980  
981      if(step>=0) {
982          // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
983          // move only one buffer (in vs. out) at a time to be extra mean
984          // step==0 performs bulk conversion and generates offsets
985  
986          // initialize the partial limits for the loop
987          if(step==0) {
988              // use the entire buffers
989              sourceLimit=bytesLimit;
990              targetLimit=resultLimit;
991              flush=cc.finalFlush;
992          } else {
993              // start with empty partial buffers
994              sourceLimit=source;
995              targetLimit=target;
996              flush=FALSE;
997  
998              // output offsets only for bulk conversion
999              resultOffsets=NULL;
1000          }
1001  
1002          for(;;) {
1003              // resetting the opposite conversion direction must not affect this one
1004              ucnv_resetFromUnicode(cnv);
1005  
1006              // convert
1007              ucnv_toUnicode(cnv,
1008                  &target, targetLimit,
1009                  &source, sourceLimit,
1010                  resultOffsets,
1011                  flush, pErrorCode);
1012  
1013              // check pointers and errors
1014              if(source>sourceLimit || target>targetLimit) {
1015                  *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1016                  break;
1017              } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1018                  if(target!=targetLimit) {
1019                      // buffer overflow must only be set when the target is filled
1020                      *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1021                      break;
1022                  } else if(targetLimit==resultLimit) {
1023                      // not just a partial overflow
1024                      break;
1025                  }
1026  
1027                  // the partial target is filled, set a new limit, reset the error and continue
1028                  targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1029                  *pErrorCode=U_ZERO_ERROR;
1030              } else if(U_FAILURE(*pErrorCode)) {
1031                  // some other error occurred, done
1032                  break;
1033              } else {
1034                  if(source!=sourceLimit) {
1035                      // when no error occurs, then the input must be consumed
1036                      *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1037                      break;
1038                  }
1039  
1040                  if(sourceLimit==bytesLimit) {
1041                      // we are done
1042                      break;
1043                  }
1044  
1045                  // the partial conversion succeeded, set a new limit and continue
1046                  sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;
1047                  flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);
1048              }
1049          }
1050      } else /* step<0 */ {
1051          /*
1052           * step==-1: call only ucnv_getNextUChar()
1053           * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
1054           *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
1055           *   else give it at most (-step-2)/2 bytes
1056           */
1057          UChar32 c;
1058  
1059          // end the loop by getting an index out of bounds error
1060          for(;;) {
1061              // resetting the opposite conversion direction must not affect this one
1062              ucnv_resetFromUnicode(cnv);
1063  
1064              // convert
1065              if((step&1)!=0 /* odd: -1, -3, -5, ... */) {
1066                  sourceLimit=source; // use sourceLimit not as a real limit
1067                                      // but to remember the pre-getNextUChar source pointer
1068                  c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);
1069  
1070                  // check pointers and errors
1071                  if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
1072                      if(source!=bytesLimit) {
1073                          *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1074                      } else {
1075                          *pErrorCode=U_ZERO_ERROR;
1076                      }
1077                      break;
1078                  } else if(U_FAILURE(*pErrorCode)) {
1079                      break;
1080                  }
1081                  // source may not move if c is from previous overflow
1082  
1083                  if(target==resultLimit) {
1084                      *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1085                      break;
1086                  }
1087                  if(c<=0xffff) {
1088                      *target++=(UChar)c;
1089                  } else {
1090                      *target++=U16_LEAD(c);
1091                      if(target==resultLimit) {
1092                          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1093                          break;
1094                      }
1095                      *target++=U16_TRAIL(c);
1096                  }
1097  
1098                  // alternate between -n-1 and -n but leave -1 alone
1099                  if(step<-1) {
1100                      ++step;
1101                  }
1102              } else /* step is even */ {
1103                  // allow only one UChar output
1104                  targetLimit=target<resultLimit ? target+1 : resultLimit;
1105  
1106                  // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
1107                  // and never output offsets
1108                  if(step==-2) {
1109                      sourceLimit=bytesLimit;
1110                  } else {
1111                      sourceLimit=source+(-step-2)/2;
1112                      if(sourceLimit>bytesLimit) {
1113                          sourceLimit=bytesLimit;
1114                      }
1115                  }
1116  
1117                  ucnv_toUnicode(cnv,
1118                      &target, targetLimit,
1119                      &source, sourceLimit,
1120                      NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);
1121  
1122                  // check pointers and errors
1123                  if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1124                      if(target!=targetLimit) {
1125                          // buffer overflow must only be set when the target is filled
1126                          *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1127                          break;
1128                      } else if(targetLimit==resultLimit) {
1129                          // not just a partial overflow
1130                          break;
1131                      }
1132  
1133                      // the partial target is filled, set a new limit and continue
1134                      *pErrorCode=U_ZERO_ERROR;
1135                  } else if(U_FAILURE(*pErrorCode)) {
1136                      // some other error occurred, done
1137                      break;
1138                  } else {
1139                      if(source!=sourceLimit) {
1140                          // when no error occurs, then the input must be consumed
1141                          *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1142                          break;
1143                      }
1144  
1145                      // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
1146                  }
1147  
1148                  --step;
1149              }
1150          }
1151      }
1152  
1153      return (int32_t)(target-result);
1154  }
1155  
1156  UBool
ToUnicodeCase(ConversionCase & cc,UConverterToUCallback callback,const char * option)1157  ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
1158      // open the converter
1159      IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
1160      LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
1161      // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078
1162      if(errorCode.isFailure()) {
1163          errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1164                  cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
1165          errorCode.reset();
1166          return FALSE;
1167      }
1168  
1169      // set the callback
1170      if(callback!=NULL) {
1171          ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
1172          if(U_FAILURE(errorCode)) {
1173              errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
1174                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1175              return FALSE;
1176          }
1177      }
1178  
1179      int32_t resultOffsets[256];
1180      UChar result[256];
1181      int32_t resultLength;
1182      UBool ok;
1183  
1184      static const struct {
1185          int32_t step;
1186          const char *name;
1187      } steps[]={
1188          { 0, "bulk" }, // must be first for offsets to be checked
1189          { 1, "step=1" },
1190          { 3, "step=3" },
1191          { 7, "step=7" },
1192          { -1, "getNext" },
1193          { -2, "toU(bulk)+getNext" },
1194          { -3, "getNext+toU(bulk)" },
1195          { -4, "toU(1)+getNext" },
1196          { -5, "getNext+toU(1)" },
1197          { -12, "toU(5)+getNext" },
1198          { -13, "getNext+toU(5)" },
1199      };
1200      int32_t i, step;
1201  
1202      ok=TRUE;
1203      for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1204          step=steps[i].step;
1205          if(step<0 && !cc.finalFlush) {
1206              // skip ucnv_getNextUChar() if !finalFlush because
1207              // ucnv_getNextUChar() always implies flush
1208              continue;
1209          }
1210          if(step!=0) {
1211              // bulk test is first, then offsets are not checked any more
1212              cc.offsets=NULL;
1213          }
1214          else {
1215              memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
1216          }
1217          memset(result, -1, UPRV_LENGTHOF(result));
1218          errorCode.reset();
1219          resultLength=stepToUnicode(cc, cnv.getAlias(),
1220                                  result, UPRV_LENGTHOF(result),
1221                                  step==0 ? resultOffsets : NULL,
1222                                  step, errorCode);
1223          ok=checkToUnicode(
1224                  cc, cnv.getAlias(), steps[i].name,
1225                  result, resultLength,
1226                  cc.offsets!=NULL ? resultOffsets : NULL,
1227                  errorCode);
1228          if(errorCode.isFailure() || !cc.finalFlush) {
1229              // reset if an error occurred or we did not flush
1230              // otherwise do nothing to make sure that flushing resets
1231              ucnv_resetToUnicode(cnv.getAlias());
1232          }
1233          if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
1234              errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1235                  cc.caseNr, cc.charset, resultLength);
1236          }
1237          if (result[resultLength] != (UChar)-1) {
1238              errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
1239                  cc.caseNr, cc.charset, resultLength);
1240          }
1241      }
1242  
1243      // not a real loop, just a convenience for breaking out of the block
1244      while(ok && cc.finalFlush) {
1245          // test ucnv_toUChars()
1246          memset(result, 0, sizeof(result));
1247  
1248          errorCode.reset();
1249          resultLength=ucnv_toUChars(cnv.getAlias(),
1250                          result, UPRV_LENGTHOF(result),
1251                          (const char *)cc.bytes, cc.bytesLength,
1252                          errorCode);
1253          ok=checkToUnicode(
1254                  cc, cnv.getAlias(), "toUChars",
1255                  result, resultLength,
1256                  NULL,
1257                  errorCode);
1258          if(!ok) {
1259              break;
1260          }
1261  
1262          // test preflighting
1263          // keep the correct result for simple checking
1264          errorCode.reset();
1265          resultLength=ucnv_toUChars(cnv.getAlias(),
1266                          NULL, 0,
1267                          (const char *)cc.bytes, cc.bytesLength,
1268                          errorCode);
1269          if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
1270              errorCode.reset();
1271          }
1272          ok=checkToUnicode(
1273                  cc, cnv.getAlias(), "preflight toUChars",
1274                  result, resultLength,
1275                  NULL,
1276                  errorCode);
1277          break;
1278      }
1279  
1280      errorCode.reset();  // all errors have already been reported
1281      return ok;
1282  }
1283  
1284  UBool
checkToUnicode(ConversionCase & cc,UConverter * cnv,const char * name,const UChar * result,int32_t resultLength,const int32_t * resultOffsets,UErrorCode resultErrorCode)1285  ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1286                                 const UChar *result, int32_t resultLength,
1287                                 const int32_t *resultOffsets,
1288                                 UErrorCode resultErrorCode) {
1289      char resultInvalidChars[8];
1290      int8_t resultInvalidLength;
1291      UErrorCode errorCode;
1292  
1293      const char *msg;
1294  
1295      // reset the message; NULL will mean "ok"
1296      msg=NULL;
1297  
1298      errorCode=U_ZERO_ERROR;
1299      resultInvalidLength=sizeof(resultInvalidChars);
1300      ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);
1301      if(U_FAILURE(errorCode)) {
1302          errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
1303                  cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1304          return FALSE;
1305      }
1306  
1307      // check everything that might have gone wrong
1308      if(cc.unicodeLength!=resultLength) {
1309          msg="wrong result length";
1310      } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {
1311          msg="wrong result string";
1312      } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {
1313          msg="wrong offsets";
1314      } else if(cc.outErrorCode!=resultErrorCode) {
1315          msg="wrong error code";
1316      } else if(cc.invalidLength!=resultInvalidLength) {
1317          msg="wrong length of last invalid input";
1318      } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {
1319          msg="wrong last invalid input";
1320      }
1321  
1322      if(msg==NULL) {
1323          return TRUE;
1324      } else {
1325          char buffer[2000]; // one buffer for all strings
1326          char *s, *bytesString, *unicodeString, *resultString,
1327              *offsetsString, *resultOffsetsString,
1328              *invalidCharsString, *resultInvalidCharsString;
1329  
1330          bytesString=s=buffer;
1331          s=printBytes(cc.bytes, cc.bytesLength, bytesString);
1332          s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);
1333          s=printUnicode(result, resultLength, resultString=s);
1334          s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);
1335          s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1336          s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);
1337          s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);
1338  
1339          if((s-buffer)>(int32_t)sizeof(buffer)) {
1340              errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
1341                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1342              exit(1);
1343          }
1344  
1345          errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1346                "  bytes <%s>[%d]\n"
1347                " expected <%s>[%d]\n"
1348                "  result  <%s>[%d]\n"
1349                " offsets         <%s>\n"
1350                "  result offsets <%s>\n"
1351                " error code expected %s got %s\n"
1352                "  invalidChars expected <%s> got <%s>\n",
1353                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1354                bytesString, cc.bytesLength,
1355                unicodeString, cc.unicodeLength,
1356                resultString, resultLength,
1357                offsetsString,
1358                resultOffsetsString,
1359                u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1360                invalidCharsString, resultInvalidCharsString);
1361  
1362          return FALSE;
1363      }
1364  }
1365  
1366  // fromUnicode test worker functions --------------------------------------- ***
1367  
1368  static int32_t
stepFromUTF8(ConversionCase & cc,UConverter * utf8Cnv,UConverter * cnv,char * result,int32_t resultCapacity,int32_t step,UErrorCode * pErrorCode)1369  stepFromUTF8(ConversionCase &cc,
1370               UConverter *utf8Cnv, UConverter *cnv,
1371               char *result, int32_t resultCapacity,
1372               int32_t step,
1373               UErrorCode *pErrorCode) {
1374      const char *source, *sourceLimit, *utf8Limit;
1375      UChar pivotBuffer[32];
1376      UChar *pivotSource, *pivotTarget, *pivotLimit;
1377      char *target, *targetLimit, *resultLimit;
1378      UBool flush;
1379  
1380      source=cc.utf8;
1381      pivotSource=pivotTarget=pivotBuffer;
1382      target=result;
1383      utf8Limit=source+cc.utf8Length;
1384      resultLimit=result+resultCapacity;
1385  
1386      // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
1387      // move only one buffer (in vs. out) at a time to be extra mean
1388      // step==0 performs bulk conversion
1389  
1390      // initialize the partial limits for the loop
1391      if(step==0) {
1392          // use the entire buffers
1393          sourceLimit=utf8Limit;
1394          targetLimit=resultLimit;
1395          flush=cc.finalFlush;
1396  
1397          pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
1398      } else {
1399          // start with empty partial buffers
1400          sourceLimit=source;
1401          targetLimit=target;
1402          flush=FALSE;
1403  
1404          // empty pivot is not allowed, make it of length step
1405          pivotLimit=pivotBuffer+step;
1406      }
1407  
1408      for(;;) {
1409          // resetting the opposite conversion direction must not affect this one
1410          ucnv_resetFromUnicode(utf8Cnv);
1411          ucnv_resetToUnicode(cnv);
1412  
1413          // convert
1414          ucnv_convertEx(cnv, utf8Cnv,
1415              &target, targetLimit,
1416              &source, sourceLimit,
1417              pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
1418              FALSE, flush, pErrorCode);
1419  
1420          // check pointers and errors
1421          if(source>sourceLimit || target>targetLimit) {
1422              *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1423              break;
1424          } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1425              if(target!=targetLimit) {
1426                  // buffer overflow must only be set when the target is filled
1427                  *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1428                  break;
1429              } else if(targetLimit==resultLimit) {
1430                  // not just a partial overflow
1431                  break;
1432              }
1433  
1434              // the partial target is filled, set a new limit, reset the error and continue
1435              targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1436              *pErrorCode=U_ZERO_ERROR;
1437          } else if(U_FAILURE(*pErrorCode)) {
1438              if(pivotSource==pivotBuffer) {
1439                  // toUnicode error, should not occur
1440                  // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1441                  break;
1442              } else {
1443                  // fromUnicode error
1444                  // some other error occurred, done
1445                  break;
1446              }
1447          } else {
1448              if(source!=sourceLimit) {
1449                  // when no error occurs, then the input must be consumed
1450                  *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1451                  break;
1452              }
1453  
1454              if(sourceLimit==utf8Limit) {
1455                  // we are done
1456                  if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
1457                      // ucnv_convertEx() warns about not terminating the output
1458                      // but ucnv_fromUnicode() does not and so
1459                      // checkFromUnicode() does not expect it
1460                      *pErrorCode=U_ZERO_ERROR;
1461                  }
1462                  break;
1463              }
1464  
1465              // the partial conversion succeeded, set a new limit and continue
1466              sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
1467              flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
1468          }
1469      }
1470  
1471      return (int32_t)(target-result);
1472  }
1473  
1474  static int32_t
stepFromUnicode(ConversionCase & cc,UConverter * cnv,char * result,int32_t resultCapacity,int32_t * resultOffsets,int32_t step,UErrorCode * pErrorCode)1475  stepFromUnicode(ConversionCase &cc, UConverter *cnv,
1476                  char *result, int32_t resultCapacity,
1477                  int32_t *resultOffsets, /* also resultCapacity */
1478                  int32_t step,
1479                  UErrorCode *pErrorCode) {
1480      const UChar *source, *sourceLimit, *unicodeLimit;
1481      char *target, *targetLimit, *resultLimit;
1482      UBool flush;
1483  
1484      source=cc.unicode;
1485      target=result;
1486      unicodeLimit=source+cc.unicodeLength;
1487      resultLimit=result+resultCapacity;
1488  
1489      // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
1490      // move only one buffer (in vs. out) at a time to be extra mean
1491      // step==0 performs bulk conversion and generates offsets
1492  
1493      // initialize the partial limits for the loop
1494      if(step==0) {
1495          // use the entire buffers
1496          sourceLimit=unicodeLimit;
1497          targetLimit=resultLimit;
1498          flush=cc.finalFlush;
1499      } else {
1500          // start with empty partial buffers
1501          sourceLimit=source;
1502          targetLimit=target;
1503          flush=FALSE;
1504  
1505          // output offsets only for bulk conversion
1506          resultOffsets=NULL;
1507      }
1508  
1509      for(;;) {
1510          // resetting the opposite conversion direction must not affect this one
1511          ucnv_resetToUnicode(cnv);
1512  
1513          // convert
1514          ucnv_fromUnicode(cnv,
1515              &target, targetLimit,
1516              &source, sourceLimit,
1517              resultOffsets,
1518              flush, pErrorCode);
1519  
1520          // check pointers and errors
1521          if(source>sourceLimit || target>targetLimit) {
1522              *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1523              break;
1524          } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1525              if(target!=targetLimit) {
1526                  // buffer overflow must only be set when the target is filled
1527                  *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1528                  break;
1529              } else if(targetLimit==resultLimit) {
1530                  // not just a partial overflow
1531                  break;
1532              }
1533  
1534              // the partial target is filled, set a new limit, reset the error and continue
1535              targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1536              *pErrorCode=U_ZERO_ERROR;
1537          } else if(U_FAILURE(*pErrorCode)) {
1538              // some other error occurred, done
1539              break;
1540          } else {
1541              if(source!=sourceLimit) {
1542                  // when no error occurs, then the input must be consumed
1543                  *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1544                  break;
1545              }
1546  
1547              if(sourceLimit==unicodeLimit) {
1548                  // we are done
1549                  break;
1550              }
1551  
1552              // the partial conversion succeeded, set a new limit and continue
1553              sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;
1554              flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);
1555          }
1556      }
1557  
1558      return (int32_t)(target-result);
1559  }
1560  
1561  UBool
FromUnicodeCase(ConversionCase & cc,UConverterFromUCallback callback,const char * option)1562  ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {
1563      UConverter *cnv;
1564      UErrorCode errorCode;
1565  
1566      // open the converter
1567      errorCode=U_ZERO_ERROR;
1568      cnv=cnv_open(cc.charset, errorCode);
1569      if(U_FAILURE(errorCode)) {
1570          errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1571                  cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1572          return FALSE;
1573      }
1574      ucnv_resetToUnicode(utf8Cnv);
1575  
1576      // set the callback
1577      if(callback!=NULL) {
1578          ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
1579          if(U_FAILURE(errorCode)) {
1580              errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
1581                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1582              ucnv_close(cnv);
1583              return FALSE;
1584          }
1585      }
1586  
1587      // set the fallbacks flag
1588      // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
1589      ucnv_setFallback(cnv, cc.fallbacks);
1590  
1591      // set the subchar
1592      int32_t length;
1593  
1594      if(cc.setSub>0) {
1595          length=(int32_t)strlen(cc.subchar);
1596          ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
1597          if(U_FAILURE(errorCode)) {
1598              errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
1599                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1600              ucnv_close(cnv);
1601              return FALSE;
1602          }
1603      } else if(cc.setSub<0) {
1604          ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
1605          if(U_FAILURE(errorCode)) {
1606              errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
1607                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1608              ucnv_close(cnv);
1609              return FALSE;
1610          }
1611      }
1612  
1613      // convert unicode to utf8
1614      char utf8[256];
1615      cc.utf8=utf8;
1616      u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
1617                  cc.unicode, cc.unicodeLength,
1618                  &errorCode);
1619      if(U_FAILURE(errorCode)) {
1620          // skip UTF-8 testing of a string with an unpaired surrogate,
1621          // or of one that's too long
1622          // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1623          cc.utf8Length=-1;
1624      }
1625  
1626      int32_t resultOffsets[256];
1627      char result[256];
1628      int32_t resultLength;
1629      UBool ok;
1630  
1631      static const struct {
1632          int32_t step;
1633          const char *name, *utf8Name;
1634      } steps[]={
1635          { 0, "bulk",   "utf8" }, // must be first for offsets to be checked
1636          { 1, "step=1", "utf8 step=1" },
1637          { 3, "step=3", "utf8 step=3" },
1638          { 7, "step=7", "utf8 step=7" }
1639      };
1640      int32_t i, step;
1641  
1642      ok=TRUE;
1643      for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1644          step=steps[i].step;
1645          memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
1646          memset(result, -1, UPRV_LENGTHOF(result));
1647          errorCode=U_ZERO_ERROR;
1648          resultLength=stepFromUnicode(cc, cnv,
1649                                  result, UPRV_LENGTHOF(result),
1650                                  step==0 ? resultOffsets : NULL,
1651                                  step, &errorCode);
1652          ok=checkFromUnicode(
1653                  cc, cnv, steps[i].name,
1654                  (uint8_t *)result, resultLength,
1655                  cc.offsets!=NULL ? resultOffsets : NULL,
1656                  errorCode);
1657          if(U_FAILURE(errorCode) || !cc.finalFlush) {
1658              // reset if an error occurred or we did not flush
1659              // otherwise do nothing to make sure that flushing resets
1660              ucnv_resetFromUnicode(cnv);
1661          }
1662          if (resultOffsets[resultLength] != -1) {
1663              errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1664                  cc.caseNr, cc.charset, resultLength);
1665          }
1666          if (result[resultLength] != (char)-1) {
1667              errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
1668                  cc.caseNr, cc.charset, resultLength);
1669          }
1670  
1671          // bulk test is first, then offsets are not checked any more
1672          cc.offsets=NULL;
1673  
1674          // test direct conversion from UTF-8
1675          if(cc.utf8Length>=0) {
1676              errorCode=U_ZERO_ERROR;
1677              resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
1678                                      result, UPRV_LENGTHOF(result),
1679                                      step, &errorCode);
1680              ok=checkFromUnicode(
1681                      cc, cnv, steps[i].utf8Name,
1682                      (uint8_t *)result, resultLength,
1683                      NULL,
1684                      errorCode);
1685              if(U_FAILURE(errorCode) || !cc.finalFlush) {
1686                  // reset if an error occurred or we did not flush
1687                  // otherwise do nothing to make sure that flushing resets
1688                  ucnv_resetToUnicode(utf8Cnv);
1689                  ucnv_resetFromUnicode(cnv);
1690              }
1691          }
1692      }
1693  
1694      // not a real loop, just a convenience for breaking out of the block
1695      while(ok && cc.finalFlush) {
1696          // test ucnv_fromUChars()
1697          memset(result, 0, sizeof(result));
1698  
1699          errorCode=U_ZERO_ERROR;
1700          resultLength=ucnv_fromUChars(cnv,
1701                          result, UPRV_LENGTHOF(result),
1702                          cc.unicode, cc.unicodeLength,
1703                          &errorCode);
1704          ok=checkFromUnicode(
1705                  cc, cnv, "fromUChars",
1706                  (uint8_t *)result, resultLength,
1707                  NULL,
1708                  errorCode);
1709          if(!ok) {
1710              break;
1711          }
1712  
1713          // test preflighting
1714          // keep the correct result for simple checking
1715          errorCode=U_ZERO_ERROR;
1716          resultLength=ucnv_fromUChars(cnv,
1717                          NULL, 0,
1718                          cc.unicode, cc.unicodeLength,
1719                          &errorCode);
1720          if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
1721              errorCode=U_ZERO_ERROR;
1722          }
1723          ok=checkFromUnicode(
1724                  cc, cnv, "preflight fromUChars",
1725                  (uint8_t *)result, resultLength,
1726                  NULL,
1727                  errorCode);
1728          break;
1729      }
1730  
1731      ucnv_close(cnv);
1732      return ok;
1733  }
1734  
1735  UBool
checkFromUnicode(ConversionCase & cc,UConverter * cnv,const char * name,const uint8_t * result,int32_t resultLength,const int32_t * resultOffsets,UErrorCode resultErrorCode)1736  ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1737                                   const uint8_t *result, int32_t resultLength,
1738                                   const int32_t *resultOffsets,
1739                                   UErrorCode resultErrorCode) {
1740      UChar resultInvalidUChars[8];
1741      int8_t resultInvalidLength;
1742      UErrorCode errorCode;
1743  
1744      const char *msg;
1745  
1746      // reset the message; NULL will mean "ok"
1747      msg=NULL;
1748  
1749      errorCode=U_ZERO_ERROR;
1750      resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
1751      ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
1752      if(U_FAILURE(errorCode)) {
1753          errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
1754                  cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1755          return FALSE;
1756      }
1757  
1758      // check everything that might have gone wrong
1759      if(cc.bytesLength!=resultLength) {
1760          msg="wrong result length";
1761      } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {
1762          msg="wrong result string";
1763      } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {
1764          msg="wrong offsets";
1765      } else if(cc.outErrorCode!=resultErrorCode) {
1766          msg="wrong error code";
1767      } else if(cc.invalidLength!=resultInvalidLength) {
1768          msg="wrong length of last invalid input";
1769      } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {
1770          msg="wrong last invalid input";
1771      }
1772  
1773      if(msg==NULL) {
1774          return TRUE;
1775      } else {
1776          char buffer[2000]; // one buffer for all strings
1777          char *s, *unicodeString, *bytesString, *resultString,
1778              *offsetsString, *resultOffsetsString,
1779              *invalidCharsString, *resultInvalidUCharsString;
1780  
1781          unicodeString=s=buffer;
1782          s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);
1783          s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);
1784          s=printBytes(result, resultLength, resultString=s);
1785          s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);
1786          s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1787          s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);
1788          s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);
1789  
1790          if((s-buffer)>(int32_t)sizeof(buffer)) {
1791              errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
1792                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1793              exit(1);
1794          }
1795  
1796          errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1797                "  unicode <%s>[%d]\n"
1798                " expected <%s>[%d]\n"
1799                "  result  <%s>[%d]\n"
1800                " offsets         <%s>\n"
1801                "  result offsets <%s>\n"
1802                " error code expected %s got %s\n"
1803                "  invalidChars expected <%s> got <%s>\n",
1804                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1805                unicodeString, cc.unicodeLength,
1806                bytesString, cc.bytesLength,
1807                resultString, resultLength,
1808                offsetsString,
1809                resultOffsetsString,
1810                u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1811                invalidCharsString, resultInvalidUCharsString);
1812  
1813          return FALSE;
1814      }
1815  }
1816  
1817  #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1818