Lines Matching +full:no +full:- +full:bulk
6 * Copyright (C) 2003-2014, International Business Machines
11 * encoding: UTF-8
18 * Test file for data-driven conversion tests.
26 * is slightly unnecessary - it removes tests for Unicode charsets
27 * like UTF-8 that should work.
28 * However, there is no easy way for the test to detect whether a test case
57 utf8Cnv=ucnv_open("UTF-8", &errorCode); in ConversionTest()
60 errln("unable to open UTF-8 converter"); in ConversionTest()
84 // test data interface ----------------------------------------------------- ***
104 testData=dataModule->createTestData("toUnicode", errorCode); in TestToUnicode()
106 for(i=0; testData->nextCase(testCase, errorCode); ++i) { in TestToUnicode()
108 errln("error retrieving conversion/toUnicode test case %d - %s", in TestToUnicode()
116 s=testCase->getString("charset", errorCode); in TestToUnicode()
120 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); in TestToUnicode()
121 unicode=testCase->getString("unicode", errorCode); in TestToUnicode()
126 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); in TestToUnicode()
135 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); in TestToUnicode()
136 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); in TestToUnicode()
138 s=testCase->getString("errorCode", errorCode); in TestToUnicode()
153 s=testCase->getString("callback", errorCode); in TestToUnicode()
178 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode); in TestToUnicode()
181 errln("error parsing conversion/toUnicode test case %d - %s", in TestToUnicode()
217 testData=dataModule->createTestData("fromUnicode", errorCode); in TestFromUnicode()
219 for(i=0; testData->nextCase(testCase, errorCode); ++i) { in TestFromUnicode()
221 errln("error retrieving conversion/fromUnicode test case %d - %s", in TestFromUnicode()
229 s=testCase->getString("charset", errorCode); in TestFromUnicode()
233 unicode=testCase->getString("unicode", errorCode); in TestFromUnicode()
236 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); in TestFromUnicode()
239 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); in TestFromUnicode()
248 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); in TestFromUnicode()
249 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); in TestFromUnicode()
251 s=testCase->getString("errorCode", errorCode); in TestFromUnicode()
262 s=testCase->getString("callback", errorCode); in TestFromUnicode()
263 cc.setSub=0; // default: no subchar in TestFromUnicode()
266 // read NUL-separated subchar first, if any in TestFromUnicode()
267 // copy the subchar from Latin-1 characters in TestFromUnicode()
272 length=s.length()-length; in TestFromUnicode()
281 // NUL-terminate the subchar in TestFromUnicode()
291 length=s.length()-(index+1); in TestFromUnicode()
296 // NUL-terminate the subString in TestFromUnicode()
298 cc.setSub=-1; in TestFromUnicode()
329 invalidUChars=testCase->getString("invalidUChars", errorCode); in TestFromUnicode()
334 errln("error parsing conversion/fromUnicode test case %d - %s", in TestFromUnicode()
373 testData=dataModule->createTestData("getUnicodeSet", errorCode); in TestGetUnicodeSet()
375 for(i=0; testData->nextCase(testCase, errorCode); ++i) { in TestGetUnicodeSet()
377 errln("error retrieving conversion/getUnicodeSet test case %d - %s", in TestGetUnicodeSet()
383 s=testCase->getString("charset", errorCode); in TestGetUnicodeSet()
386 map=testCase->getString("map", errorCode); in TestGetUnicodeSet()
387 mapnot=testCase->getString("mapnot", errorCode); in TestGetUnicodeSet()
389 which=testCase->getInt28("which", errorCode); in TestGetUnicodeSet()
392 errln("error parsing conversion/getUnicodeSet test case %d - %s", in TestGetUnicodeSet()
405 … errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n" in TestGetUnicodeSet()
415 … errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n" in TestGetUnicodeSet()
426 … errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s", in TestGetUnicodeSet()
432 … ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode); in TestGetUnicodeSet()
435 … errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s", in TestGetUnicodeSet()
448 …errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d", in TestGetUnicodeSet()
460 …errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test… in TestGetUnicodeSet()
484 ((UnicodeSet *)context)->remove(codePoint); // the converter cannot convert this code point in getUnicodeSetCallback()
537 "UTF-8", in TestGetUnicodeSet2()
538 "UTF-7", in TestGetUnicodeSet2()
539 "UTF-16", in TestGetUnicodeSet2()
540 "US-ASCII", in TestGetUnicodeSet2()
541 "ISO-8859-1", in TestGetUnicodeSet2()
542 "windows-1252", in TestGetUnicodeSet2()
543 "Shift-JIS", in TestGetUnicodeSet2()
544 "ibm-1390", // EBCDIC_STATEFUL table in TestGetUnicodeSet2()
545 "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table in TestGetUnicodeSet2()
547 "ISO-2022-JP", in TestGetUnicodeSet2()
549 "ISO-2022-CN", in TestGetUnicodeSet2()
550 "ISO-2022-CN-EXT", in TestGetUnicodeSet2()
560 … errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode)); in TestGetUnicodeSet2()
566 … errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode)); in TestGetUnicodeSet2()
574 expected.add(0, cpLimit-1); in TestGetUnicodeSet2()
623 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d", in TestGetUnicodeSet2()
635 … errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d", in TestGetUnicodeSet2()
646 // Test all codepoints which has the default ignorable Unicode property are ignored if they have no…
651 const char *cnv_name = "euc-jp-2007"; in TestDefaultIgnorableCallback()
657 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status)); in TestDefaultIgnorableCallback()
663 … dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status)); in TestDefaultIgnorableCallback()
669 dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status)); in TestDefaultIgnorableCallback()
681 int size = set_ignorable->size(); in TestDefaultIgnorableCallback()
686 input[0] = set_ignorable->charAt(i); in TestDefaultIgnorableCallback()
688 …cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); in TestDefaultIgnorableCallback()
690 … errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status)); in TestDefaultIgnorableCallback()
694 // test non-ignorables are not ignored in TestDefaultIgnorableCallback()
695 size = set_not_ignorable->size(); in TestDefaultIgnorableCallback()
700 input[0] = set_not_ignorable->charAt(i); in TestDefaultIgnorableCallback()
706 …cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); in TestDefaultIgnorableCallback()
708 …errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status)); in TestDefaultIgnorableCallback()
716 LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode)); in TestUTF8ToUTF8Overflow()
717 LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode)); in TestUTF8ToUTF8Overflow()
737 length = (int32_t)(target - result); in TestUTF8ToUTF8Overflow()
747 assertSuccess("UTF-8->UTF-8", errorCode); in TestUTF8ToUTF8Overflow()
748 length = (int32_t)(target - result); in TestUTF8ToUTF8Overflow()
770 length = (int32_t)(target - result); in TestUTF8ToUTF8Overflow()
780 assertSuccess("text2 UTF-8->UTF-8", errorCode); in TestUTF8ToUTF8Overflow()
781 length = (int32_t)(target - result); in TestUTF8ToUTF8Overflow()
807 length = (int32_t)(target - result); in TestUTF8ToUTF8Overflow()
824 length = (int32_t)(target - result); in TestUTF8ToUTF8Overflow()
837 LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode)); in TestUTF8ToUTF8Streaming()
838 LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode)); in TestUTF8ToUTF8Streaming()
872 length = (int32_t)(target - result); in TestUTF8ToUTF8Streaming()
873 targetLen -= length; in TestUTF8ToUTF8Streaming()
874 assertEquals("First chunk -1 doesn't match converted length", chunk1 - 1, length); in TestUTF8ToUTF8Streaming()
885 length = (int32_t)(target - result - length); in TestUTF8ToUTF8Streaming()
886 targetLen -= length; in TestUTF8ToUTF8Streaming()
889 assertEquals("Full text length match", sourceLen, sizeof(result) - targetLen); in TestUTF8ToUTF8Streaming()
890 assertSuccess("UTF-8->UTF-8", errorCode); in TestUTF8ToUTF8Streaming()
893 // open testdata or ICU data converter ------------------------------------- ***
909 // output helpers ---------------------------------------------------------- ***
913 return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); in hexDigit()
922 --length; in printBytes()
929 --length; in printBytes()
978 // print all offsets with 2 characters each (-x, -9..99, xx) in printOffsets()
979 if(o<-9) { in printOffsets()
980 *out++='-'; in printOffsets()
983 *out++='-'; in printOffsets()
984 *out++=(char)('0'-o); in printOffsets()
997 // toUnicode test worker functions ----------------------------------------- ***
1015 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time in stepToUnicode()
1017 // step==0 performs bulk conversion and generates offsets in stepToUnicode()
1031 // output offsets only for bulk conversion in stepToUnicode()
1061 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; in stepToUnicode()
1068 // when no error occurs, then the input must be consumed in stepToUnicode()
1079 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit; in stepToUnicode()
1085 * step==-1: call only ucnv_getNextUChar() in stepToUnicode()
1087 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input, in stepToUnicode()
1088 * else give it at most (-step-2)/2 bytes in stepToUnicode()
1098 if((step&1)!=0 /* odd: -1, -3, -5, ... */) { in stepToUnicode()
1100 // but to remember the pre-getNextUChar source pointer in stepToUnicode()
1131 // alternate between -n-1 and -n but leave -1 alone in stepToUnicode()
1132 if(step<-1) { in stepToUnicode()
1141 if(step==-2) { in stepToUnicode()
1144 sourceLimit=source+(-step-2)/2; in stepToUnicode()
1173 // when no error occurs, then the input must be consumed in stepToUnicode()
1181 --step; in stepToUnicode()
1186 return (int32_t)(target-result); in stepToUnicode()
1194 …// with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-c… in ToUnicodeCase()
1196 errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", in ToUnicodeCase()
1206 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s", in ToUnicodeCase()
1221 { 0, "bulk" }, // must be first for offsets to be checked in ToUnicodeCase()
1225 { -1, "getNext" }, in ToUnicodeCase()
1226 { -2, "toU(bulk)+getNext" }, in ToUnicodeCase()
1227 { -3, "getNext+toU(bulk)" }, in ToUnicodeCase()
1228 { -4, "toU(1)+getNext" }, in ToUnicodeCase()
1229 { -5, "getNext+toU(1)" }, in ToUnicodeCase()
1230 { -12, "toU(5)+getNext" }, in ToUnicodeCase()
1231 { -13, "getNext+toU(5)" }, in ToUnicodeCase()
1244 // bulk test is first, then offsets are not checked any more in ToUnicodeCase()
1249 resultOffsets[i] = -1; in ToUnicodeCase()
1253 result[i] = -1; in ToUnicodeCase()
1270 if (cc.offsets != NULL && resultOffsets[resultLength] != -1) { in ToUnicodeCase()
1274 if (result[resultLength] != (UChar)-1) { in ToUnicodeCase()
1339 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s", in checkToUnicode()
1376 if((s-buffer)>(int32_t)sizeof(buffer)) { in checkToUnicode()
1378 … cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); in checkToUnicode()
1403 // fromUnicode test worker functions --------------------------------------- ***
1423 // call ucnv_convertEx() with in/out buffers no larger than (step) at a time in stepFromUTF8()
1425 // step==0 performs bulk conversion in stepFromUTF8()
1472 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; in stepFromUTF8()
1486 // when no error occurs, then the input must be consumed in stepFromUTF8()
1503 sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit; in stepFromUTF8()
1508 return (int32_t)(target-result); in stepFromUTF8()
1526 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time in stepFromUnicode()
1528 // step==0 performs bulk conversion and generates offsets in stepFromUnicode()
1542 // output offsets only for bulk conversion in stepFromUnicode()
1572 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; in stepFromUnicode()
1579 // when no error occurs, then the input must be consumed in stepFromUnicode()
1590 sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit; in stepFromUnicode()
1595 return (int32_t)(target-result); in stepFromUnicode()
1607 … errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", in FromUnicodeCase()
1617 … errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s", in FromUnicodeCase()
1635 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s", in FromUnicodeCase()
1641 ucnv_setSubstString(cnv, cc.subString, -1, &errorCode); in FromUnicodeCase()
1643 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s", in FromUnicodeCase()
1657 // skip UTF-8 testing of a string with an unpaired surrogate, in FromUnicodeCase()
1660 cc.utf8Length=-1; in FromUnicodeCase()
1672 { 0, "bulk", "utf8" }, // must be first for offsets to be checked in FromUnicodeCase()
1683 resultOffsets[i] = -1; in FromUnicodeCase()
1686 result[i] = -1; in FromUnicodeCase()
1703 if (resultOffsets[resultLength] != -1) { in FromUnicodeCase()
1707 if (result[resultLength] != (char)-1) { in FromUnicodeCase()
1712 // bulk test is first, then offsets are not checked any more in FromUnicodeCase()
1715 // test direct conversion from UTF-8 in FromUnicodeCase()
1794 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s", in checkFromUnicode()
1831 if((s-buffer)>(int32_t)sizeof(buffer)) { in checkFromUnicode()
1833 … cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); in checkFromUnicode()