// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2016, International Business Machines Corporation and * others. All Rights Reserved. ***************************************************************************/ /***************************************************************************** * * File NCNVCBTS * * Modification History: * Name Date Description * Madhu Katragadda 06/23/2000 Tests for Conveter FallBack API and Functionality ****************************************************************************** */ #include #include "unicode/uloc.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "cintltst.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "ncnvfbts.h" #include "cmemory.h" #include "cstring.h" #if !UCONFIG_NO_LEGACY_CONVERSION #define NEW_MAX_BUFFER 999 #define nct_min(x,y) ((x %d chars out]. \nResult :", sourceLen, targ-junkout); if(getTestOption(VERBOSITY_OPTION)) { char junk[9999]; char offset_str[9999]; junk[0] = 0; offset_str[0] = 0; for(p = junkout;p %d chars.\nResult :", sourcelen, targ-junkout); if(getTestOption(VERBOSITY_OPTION)) { junk[0] = 0; offset_str[0] = 0; for(p = junkout;p %s(SBCS) with FallBack did not match.\n", nativeCodePage[i]); if(!testConvertToUnicode(expectedNative, sizeof(expectedNative), retrievedSBCSText, UPRV_LENGTHOF(retrievedSBCSText), nativeCodePage[i], TRUE, fromNativeoffs )) log_err("%s->u(SBCS) with Fallback did not match.\n", nativeCodePage[i]); } /*DBCS*/ if(!testConvertFromUnicode(DBCSText, UPRV_LENGTHOF(DBCSText), expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), "ibm-1363", TRUE, toIBM1363Offs_DBCS )) log_err("u-> ibm-1363(DBCS portion) with FallBack did not match.\n"); if(!testConvertToUnicode(expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), retrievedDBCSText, UPRV_LENGTHOF(retrievedDBCSText),"ibm-1363", TRUE, fromIBM1363offs_DBCS )) log_err("ibm-1363->u(DBCS portion) with Fallback did not match.\n"); /*MBCS*/ if(!testConvertFromUnicode(MBCSText, UPRV_LENGTHOF(MBCSText), expectedIBM950, sizeof(expectedIBM950), "ibm-950", TRUE, toIBM950Offs )) log_err("u-> ibm-950(MBCS) with FallBack did not match.\n"); if(!testConvertToUnicode(expectedIBM950, sizeof(expectedIBM950), retrievedMBCSText, UPRV_LENGTHOF(retrievedMBCSText),"ibm-950", TRUE, fromIBM950offs )) log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); /*commented untill data table is available*/ log_verbose("toUnicode fallback with fallback data for MBCS\n"); { const uint8_t IBM950input[] = { 0xf4, 0x87, 0xa4, 0x4a, 0xf4, 0x88, 0xa4, 0x4b, 0xf9, 0x92, 0xdc, 0xb0, }; UChar expectedUnicodeText[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9}; int32_t fromIBM950inputOffs [] = { 0, 2, 4, 6, 8, 10}; /* for testing reverse fallback behavior */ UChar expectedFallbackFalse[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9}; if(!testConvertToUnicode(IBM950input, sizeof(IBM950input), expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"ibm-950", TRUE, fromIBM950inputOffs )) log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); if(!testConvertToUnicode(IBM950input, sizeof(IBM950input), expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"ibm-950", FALSE, fromIBM950inputOffs )) log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); } log_verbose("toUnicode fallback with fallback data for euc-tw\n"); { const uint8_t euc_tw_input[] = { 0xA7, 0xCC, 0x8E, 0xA2, 0xA1, 0xAB, 0xA8, 0xC7, 0xC8, 0xDE, 0xA8, 0xCD, 0x8E, 0xA2, 0xA2, 0xEA,}; UChar expectedUnicodeText[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278}; int32_t from_euc_tw_offs [] = { 0, 2, 6, 8, 10, 12}; /* for testing reverse fallback behavior */ UChar expectedFallbackFalse[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278}; if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input), expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"euc-tw", TRUE, from_euc_tw_offs )) log_err("from euc-tw->u with Fallback did not match.\n"); if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input), expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"euc-tw", FALSE, from_euc_tw_offs )) log_err("from euc-tw->u with Fallback false did not match.\n"); } log_verbose("fromUnicode to euc-tw with fallback data euc-tw\n"); { UChar inputText[]= { 0x0001, 0x008e, 0x203e, 0x2223, 0xff5c, 0x5296, 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278, 0xEDEC}; const uint8_t expected_euc_tw[] = { 0x01, 0x1a, 0xa2, 0xa3, 0xa2, 0xde, 0xa2, 0xde, 0x8e, 0xa2, 0xe5, 0xb9, 0x8e, 0xa2, 0xa1, 0xab, 0x8e, 0xa2, 0xa1, 0xab, 0xc8, 0xde, 0xc8, 0xde, 0x8e, 0xa2, 0xa2, 0xea, 0x8e, 0xa2, 0xa2, 0xea, 0x8e, 0xac, 0xc6, 0xf7}; int32_t to_euc_tw_offs [] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12}; if(!testConvertFromUnicode(inputText, UPRV_LENGTHOF(inputText), expected_euc_tw, sizeof(expected_euc_tw), "euc-tw", TRUE, to_euc_tw_offs )) log_err("u-> euc-tw with FallBack did not match.\n"); } /*MBCS 1363*/ if(!testConvertFromUnicode(MBCSText1363, UPRV_LENGTHOF(MBCSText1363), expectedIBM1363, sizeof(expectedIBM1363), "ibm-1363", TRUE, toIBM1363Offs )) log_err("u-> ibm-1363(MBCS) with FallBack did not match.\n"); if(!testConvertToUnicode(expectedIBM1363, sizeof(expectedIBM1363), retrievedMBCSText1363, UPRV_LENGTHOF(retrievedMBCSText1363),"ibm-1363", TRUE, fromIBM1363offs )) log_err("ibm-1363->u(MBCS) with Fallback did not match.\n"); /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm which is test file for MBCS conversion with single-byte codepage data.*/ { /* MBCS with single byte codepage data test1.ucm*/ const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x0003}; const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0x08, 0xff,}; int32_t totest1Offs[] = { 0, 1, 2, 3, 5, 7}; const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09}; const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd, 0xfffe}; int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4,5}; /*from Unicode*/ if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), expectedtest1, sizeof(expectedtest1), "@test1", TRUE, totest1Offs )) log_err("u-> test1(MBCS conversion with single-byte) did not match.\n"); /*to Unicode*/ if(!testConvertToUnicode(test1input, sizeof(test1input), expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test1", TRUE, fromtest1Offs )) log_err("test1(MBCS conversion with single-byte) -> u did not match.\n"); } /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm which is test file for MBCS conversion with three-byte codepage data.*/ { /* MBCS with three byte codepage data test3.ucm*/ const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x000b, 0xd84d, 0xdc56, 0x000e, 0x0003, }; const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x07, 0xff, 0x01, 0x02, 0x0b, 0x01, 0x02, 0x0a, 0xff, 0xff,}; int32_t totest3Offs[] = { 0, 1, 2, 3, 5, 7, 7, 7, 8, 8, 8, 10, 11}; const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0e, 0x01, 0x02, 0x0d, 0x03, 0x01, 0x02, 0x0f,}; const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd }; int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10, 13, 13, 16, 17}; /*from Unicode*/ if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), expectedtest3, sizeof(expectedtest3), "@test3", TRUE, totest3Offs )) log_err("u-> test3(MBCS conversion with three-byte) did not match.\n"); /*to Unicode*/ if(!testConvertToUnicode(test3input, sizeof(test3input), expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", TRUE, fromtest3Offs )) log_err("test3(MBCS conversion with three-byte) -> u did not match.\n"); } /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm which is test file for MBCS conversion with four-byte codepage data.*/ { /* MBCS with three byte codepage data test4.ucm*/ const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x000f}; const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0xff, 0x01, 0x02, 0x03, 0x0a, 0xff, 0xff, 0xff}; int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 8, 8, 8, 8, 10, 11, 13}; const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x08, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,}; const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd}; int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,}; /*from Unicode*/ if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), expectedtest4, sizeof(expectedtest4), "@test4", TRUE, totest4Offs )) log_err("u-> test4(MBCS conversion with four-byte) did not match.\n"); /*to Unicode*/ if(!testConvertToUnicode(test4input, sizeof(test4input), expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", TRUE, fromtest4Offs )) log_err("test4(MBCS conversion with four-byte) -> u did not match.\n"); } /* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/ { const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E }; const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f }; int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 }; const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 }; const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c }; int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 }; /*from Unicode*/ if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput), expectedtest1, sizeof(expectedtest1), "ibm-1371", TRUE, totest1Offs )) log_err("u-> ibm-1371(MBCS conversion with single-byte) did not match.,\n"); /*to Unicode*/ if(!testConvertToUnicode(test1input, sizeof(test1input), expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "ibm-1371", TRUE, fromtest1Offs )) log_err("ibm-1371(MBCS conversion with single-byte) -> u did not match.,\n"); } } #endif