1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.charset; 11 12 import java.nio.BufferOverflowException; 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.charset.CharacterCodingException; 16 import java.nio.charset.Charset; 17 import java.nio.charset.CharsetDecoder; 18 import java.nio.charset.CharsetEncoder; 19 import java.nio.charset.CoderMalfunctionError; 20 import java.nio.charset.CoderResult; 21 import java.nio.charset.CodingErrorAction; 22 import java.nio.charset.UnsupportedCharsetException; 23 import java.nio.charset.spi.CharsetProvider; 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 import java.util.Iterator; 27 import java.util.MissingResourceException; 28 import java.util.Set; 29 import java.util.SortedMap; 30 31 import org.junit.Test; 32 import org.junit.runner.RunWith; 33 import org.junit.runners.JUnit4; 34 35 import com.ibm.icu.charset.CharsetCallback; 36 import com.ibm.icu.charset.CharsetDecoderICU; 37 import com.ibm.icu.charset.CharsetEncoderICU; 38 import com.ibm.icu.charset.CharsetICU; 39 import com.ibm.icu.charset.CharsetProviderICU; 40 import com.ibm.icu.dev.test.TestFmwk; 41 import com.ibm.icu.text.UTF16; 42 import com.ibm.icu.text.UnicodeSet; 43 import com.ibm.icu.text.UnicodeSetIterator; 44 45 @RunWith(JUnit4.class) 46 public class TestCharset extends TestFmwk { 47 @Test TestUTF16Converter()48 public void TestUTF16Converter(){ 49 CharsetProvider icu = new CharsetProviderICU(); 50 Charset cs1 = icu.charsetForName("UTF-16BE"); 51 CharsetEncoder e1 = cs1.newEncoder(); 52 CharsetDecoder d1 = cs1.newDecoder(); 53 54 Charset cs2 = icu.charsetForName("UTF-16LE"); 55 CharsetEncoder e2 = cs2.newEncoder(); 56 CharsetDecoder d2 = cs2.newDecoder(); 57 58 for(int i=0x0000; i<0x10FFFF; i+=0xFF){ 59 CharBuffer us = CharBuffer.allocate(0xFF*2); 60 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 61 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 62 for(int j=0;j<0xFF; j++){ 63 int c = i+j; 64 65 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 66 continue; 67 } 68 69 if(c>0xFFFF){ 70 char lead = UTF16.getLeadSurrogate(c); 71 char trail = UTF16.getTrailSurrogate(c); 72 if(!UTF16.isLeadSurrogate(lead)){ 73 errln("lead is not lead!"+lead+" for cp: \\U"+Integer.toHexString(c)); 74 continue; 75 } 76 if(!UTF16.isTrailSurrogate(trail)){ 77 errln("trail is not trail!"+trail); 78 continue; 79 } 80 us.put(lead); 81 us.put(trail); 82 bs1.put((byte)(lead>>8)); 83 bs1.put((byte)(lead&0xFF)); 84 bs1.put((byte)(trail>>8)); 85 bs1.put((byte)(trail&0xFF)); 86 87 bs2.put((byte)(lead&0xFF)); 88 bs2.put((byte)(lead>>8)); 89 bs2.put((byte)(trail&0xFF)); 90 bs2.put((byte)(trail>>8)); 91 }else{ 92 93 if(c<0xFF){ 94 bs1.put((byte)0x00); 95 bs1.put((byte)(c)); 96 bs2.put((byte)(c)); 97 bs2.put((byte)0x00); 98 }else{ 99 bs1.put((byte)(c>>8)); 100 bs1.put((byte)(c&0xFF)); 101 102 bs2.put((byte)(c&0xFF)); 103 bs2.put((byte)(c>>8)); 104 } 105 us.put((char)c); 106 } 107 } 108 109 110 us.limit(us.position()); 111 us.position(0); 112 if(us.length()==0){ 113 continue; 114 } 115 116 117 bs1.limit(bs1.position()); 118 bs1.position(0); 119 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 120 //newBS.put((byte)0xFE); 121 //newBS.put((byte)0xFF); 122 newBS.put(bs1); 123 bs1.position(0); 124 smBufDecode(d1, "UTF-16", bs1, us); 125 smBufEncode(e1, "UTF-16", us, newBS); 126 127 bs2.limit(bs2.position()); 128 bs2.position(0); 129 newBS.clear(); 130 //newBS.put((byte)0xFF); 131 //newBS.put((byte)0xFE); 132 newBS.put(bs2); 133 bs2.position(0); 134 smBufDecode(d2, "UTF16-LE", bs2, us); 135 smBufEncode(e2, "UTF-16LE", us, newBS); 136 137 } 138 } 139 140 @Test TestUTF32Converter()141 public void TestUTF32Converter(){ 142 CharsetProvider icu = new CharsetProviderICU(); 143 Charset cs1 = icu.charsetForName("UTF-32BE"); 144 CharsetEncoder e1 = cs1.newEncoder(); 145 CharsetDecoder d1 = cs1.newDecoder(); 146 147 Charset cs2 = icu.charsetForName("UTF-32LE"); 148 CharsetEncoder e2 = cs2.newEncoder(); 149 CharsetDecoder d2 = cs2.newDecoder(); 150 151 for(int i=0x000; i<0x10FFFF; i+=0xFF){ 152 CharBuffer us = CharBuffer.allocate(0xFF*2); 153 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 154 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 155 for(int j=0;j<0xFF; j++){ 156 int c = i+j; 157 158 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 159 continue; 160 } 161 162 if(c>0xFFFF){ 163 char lead = UTF16.getLeadSurrogate(c); 164 char trail = UTF16.getTrailSurrogate(c); 165 166 us.put(lead); 167 us.put(trail); 168 }else{ 169 us.put((char)c); 170 } 171 bs1.put((byte) (c >>> 24)); 172 bs1.put((byte) (c >>> 16)); 173 bs1.put((byte) (c >>> 8)); 174 bs1.put((byte) (c & 0xFF)); 175 176 bs2.put((byte) (c & 0xFF)); 177 bs2.put((byte) (c >>> 8)); 178 bs2.put((byte) (c >>> 16)); 179 bs2.put((byte) (c >>> 24)); 180 } 181 bs1.limit(bs1.position()); 182 bs1.position(0); 183 bs2.limit(bs2.position()); 184 bs2.position(0); 185 us.limit(us.position()); 186 us.position(0); 187 if(us.length()==0){ 188 continue; 189 } 190 191 192 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 193 194 newBS.put((byte)0x00); 195 newBS.put((byte)0x00); 196 newBS.put((byte)0xFE); 197 newBS.put((byte)0xFF); 198 199 newBS.put(bs1); 200 bs1.position(0); 201 smBufDecode(d1, "UTF-32", bs1, us); 202 smBufEncode(e1, "UTF-32", us, newBS); 203 204 205 newBS.clear(); 206 207 newBS.put((byte)0xFF); 208 newBS.put((byte)0xFE); 209 newBS.put((byte)0x00); 210 newBS.put((byte)0x00); 211 212 newBS.put(bs2); 213 bs2.position(0); 214 smBufDecode(d2, "UTF-32LE", bs2, us); 215 smBufEncode(e2, "UTF-32LE", us, newBS); 216 217 } 218 } 219 220 @Test TestASCIIConverter()221 public void TestASCIIConverter() { 222 runTestASCIIBasedConverter("ASCII", 0x80); 223 } 224 225 @Test Test88591Converter()226 public void Test88591Converter() { 227 runTestASCIIBasedConverter("iso-8859-1", 0x100); 228 } 229 runTestASCIIBasedConverter(String converter, int limit)230 public void runTestASCIIBasedConverter(String converter, int limit){ 231 CharsetProvider icu = new CharsetProviderICU(); 232 Charset icuChar = icu.charsetForName(converter); 233 CharsetEncoder encoder = icuChar.newEncoder(); 234 CharsetDecoder decoder = icuChar.newDecoder(); 235 CoderResult cr; 236 237 /* test with and without array-backed buffers */ 238 239 byte[] bytes = new byte[0x10000]; 240 char[] chars = new char[0x10000]; 241 for (int j = 0; j <= 0xffff; j++) { 242 bytes[j] = (byte) j; 243 chars[j] = (char) j; 244 } 245 246 boolean fail = false; 247 boolean arrays = false; 248 boolean decoding = false; 249 int i; 250 251 // 0 thru limit - 1 252 ByteBuffer bs = ByteBuffer.wrap(bytes, 0, limit); 253 CharBuffer us = CharBuffer.wrap(chars, 0, limit); 254 smBufDecode(decoder, converter, bs, us, true); 255 smBufDecode(decoder, converter, bs, us, false); 256 smBufEncode(encoder, converter, us, bs, true); 257 smBufEncode(encoder, converter, us, bs, false); 258 for (i = 0; i < limit; i++) { 259 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 260 us = CharBuffer.wrap(chars, i, 1).slice(); 261 try { 262 decoding = true; 263 arrays = true; 264 smBufDecode(decoder, converter, bs, us, true, false, true); 265 266 decoding = true; 267 arrays = false; 268 smBufDecode(decoder, converter, bs, us, true, false, false); 269 270 decoding = false; 271 arrays = true; 272 smBufEncode(encoder, converter, us, bs, true, false, true); 273 274 decoding = false; 275 arrays = false; 276 smBufEncode(encoder, converter, us, bs, true, false, false); 277 278 } catch (Exception ex) { 279 errln("Failed to fail to " + (decoding ? "decode" : "encode") + " 0x" 280 + Integer.toHexString(i) + (arrays ? " with arrays" : " without arrays") + " in " + converter); 281 return; 282 } 283 } 284 285 // decode limit thru 255 286 for (i = limit; i <= 0xff; i++) { 287 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 288 us = CharBuffer.wrap(chars, i, 1).slice(); 289 try { 290 smBufDecode(decoder, converter, bs, us, true, false, true); 291 fail = true; 292 arrays = true; 293 break; 294 } catch (Exception ex) { 295 } 296 try { 297 smBufDecode(decoder, converter, bs, us, true, false, false); 298 fail = true; 299 arrays = false; 300 break; 301 } catch (Exception ex) { 302 } 303 } 304 if (fail) { 305 errln("Failed to fail to decode 0x" + Integer.toHexString(i) 306 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 307 return; 308 } 309 310 // encode limit thru 0xffff, skipping through much of the 1ff to feff range to save 311 // time (it would take too much time to test every possible case) 312 for (i = limit; i <= 0xffff; i = ((i>=0x1ff && i<0xfeff) ? i+0xfd : i+1)) { 313 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 314 us = CharBuffer.wrap(chars, i, 1).slice(); 315 try { 316 smBufEncode(encoder, converter, us, bs, true, false, true); 317 fail = true; 318 arrays = true; 319 break; 320 } catch (Exception ex) { 321 } 322 try { 323 smBufEncode(encoder, converter, us, bs, true, false, false); 324 fail = true; 325 arrays = false; 326 break; 327 } catch (Exception ex) { 328 } 329 } 330 if (fail) { 331 errln("Failed to fail to encode 0x" + Integer.toHexString(i) 332 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 333 return; 334 } 335 336 // test overflow / underflow edge cases 337 outer: for (int n = 1; n <= 3; n++) { 338 for (int m = 0; m < n; m++) { 339 // expecting underflow 340 try { 341 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 342 us = CharBuffer.wrap(chars, 'a', m).slice(); 343 smBufDecode(decoder, converter, bs, us, true, false, true); 344 smBufDecode(decoder, converter, bs, us, true, false, false); 345 smBufEncode(encoder, converter, us, bs, true, false, true); 346 smBufEncode(encoder, converter, us, bs, true, false, false); 347 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 348 us = CharBuffer.wrap(chars, 'a', n).slice(); 349 smBufDecode(decoder, converter, bs, us, true, false, true, m); 350 smBufDecode(decoder, converter, bs, us, true, false, false, m); 351 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 352 us = CharBuffer.wrap(chars, 'a', m).slice(); 353 smBufEncode(encoder, converter, us, bs, true, false, true, m); 354 smBufEncode(encoder, converter, us, bs, true, false, false, m); 355 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 356 us = CharBuffer.wrap(chars, 'a', n).slice(); 357 smBufDecode(decoder, converter, bs, us, true, false, true); 358 smBufDecode(decoder, converter, bs, us, true, false, false); 359 smBufEncode(encoder, converter, us, bs, true, false, true); 360 smBufEncode(encoder, converter, us, bs, true, false, false); 361 } catch (Exception ex) { 362 fail = true; 363 break outer; 364 } 365 366 // expecting overflow 367 try { 368 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 369 us = CharBuffer.wrap(chars, 'a', m).slice(); 370 smBufDecode(decoder, converter, bs, us, true, false, true); 371 fail = true; 372 break; 373 } catch (Exception ex) { 374 if (!(ex instanceof BufferOverflowException)) { 375 fail = true; 376 break outer; 377 } 378 } 379 try { 380 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 381 us = CharBuffer.wrap(chars, 'a', m).slice(); 382 smBufDecode(decoder, converter, bs, us, true, false, false); 383 fail = true; 384 } catch (Exception ex) { 385 if (!(ex instanceof BufferOverflowException)) { 386 fail = true; 387 break outer; 388 } 389 } 390 try { 391 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 392 us = CharBuffer.wrap(chars, 'a', n).slice(); 393 smBufEncode(encoder, converter, us, bs, true, false, true); 394 fail = true; 395 } catch (Exception ex) { 396 if (!(ex instanceof BufferOverflowException)) { 397 fail = true; 398 break outer; 399 } 400 } 401 try { 402 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 403 us = CharBuffer.wrap(chars, 'a', n).slice(); 404 smBufEncode(encoder, converter, us, bs, true, false, false); 405 fail = true; 406 } catch (Exception ex) { 407 if (!(ex instanceof BufferOverflowException)) { 408 fail = true; 409 break outer; 410 } 411 } 412 } 413 } 414 if (fail) { 415 errln("Incorrect result in " + converter + " for underflow / overflow edge cases"); 416 return; 417 } 418 419 // test surrogate combinations in encoding 420 String lead = "\ud888"; 421 String trail = "\udc88"; 422 String norm = "a"; 423 String ext = "\u0275"; // theta 424 String end = ""; 425 bs = ByteBuffer.wrap(new byte[] { 0 }); 426 String[] input = new String[] { // 427 lead + lead, // malf(1) 428 lead + trail, // unmap(2) 429 lead + norm, // malf(1) 430 lead + ext, // malf(1) 431 lead + end, // malf(1) 432 trail + norm, // malf(1) 433 trail + end, // malf(1) 434 ext + norm, // unmap(1) 435 ext + end, // unmap(1) 436 }; 437 CoderResult[] result = new CoderResult[] { 438 CoderResult.malformedForLength(1), 439 CoderResult.unmappableForLength(2), 440 CoderResult.malformedForLength(1), 441 CoderResult.malformedForLength(1), 442 CoderResult.malformedForLength(1), 443 CoderResult.malformedForLength(1), 444 CoderResult.malformedForLength(1), 445 CoderResult.unmappableForLength(1), 446 CoderResult.unmappableForLength(1), 447 }; 448 449 for (int index = 0; index < input.length; index++) { 450 CharBuffer source = CharBuffer.wrap(input[index]); 451 cr = encoder.encode(source, bs, true); 452 bs.rewind(); 453 encoder.reset(); 454 455 // if cr != results[x] 456 if (!((cr.isUnderflow() && result[index].isUnderflow()) 457 || (cr.isOverflow() && result[index].isOverflow()) 458 || (cr.isMalformed() && result[index].isMalformed()) 459 || (cr.isUnmappable() && result[index].isUnmappable())) 460 || (cr.isError() && cr.length() != result[index].length())) { 461 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 462 break; 463 } 464 465 source = CharBuffer.wrap(input[index].toCharArray()); 466 cr = encoder.encode(source, bs, true); 467 bs.rewind(); 468 encoder.reset(); 469 470 // if cr != results[x] 471 if (!((cr.isUnderflow() && result[index].isUnderflow()) 472 || (cr.isOverflow() && result[index].isOverflow()) 473 || (cr.isMalformed() && result[index].isMalformed()) 474 || (cr.isUnmappable() && result[index].isUnmappable())) 475 || (cr.isError() && cr.length() != result[index].length())) { 476 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 477 break; 478 } 479 } 480 } 481 482 @Test TestUTF8Converter()483 public void TestUTF8Converter() { 484 String converter = "UTF-8"; 485 CharsetProvider icu = new CharsetProviderICU(); 486 Charset icuChar = icu.charsetForName(converter); 487 CharsetEncoder encoder = icuChar.newEncoder(); 488 CharsetDecoder decoder = icuChar.newDecoder(); 489 ByteBuffer bs; 490 CharBuffer us; 491 CoderResult cr; 492 493 494 int[] size = new int[] { 1<<7, 1<<11, 1<<16 }; // # of 1,2,3 byte combinations 495 byte[] bytes = new byte[size[0] + size[1]*2 + size[2]*3]; 496 char[] chars = new char[size[0] + size[1] + size[2]]; 497 int i = 0; 498 int x, y; 499 500 // 0 to 1 << 7 (1 byters) 501 for (; i < size[0]; i++) { 502 bytes[i] = (byte) i; 503 chars[i] = (char) i; 504 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 505 us = CharBuffer.wrap(chars, i, 1).slice(); 506 try { 507 smBufDecode(decoder, converter, bs, us, true, false, true); 508 smBufDecode(decoder, converter, bs, us, true, false, false); 509 smBufEncode(encoder, converter, us, bs, true, false, true); 510 smBufEncode(encoder, converter, us, bs, true, false, false); 511 } catch (Exception ex) { 512 errln("Incorrect result in " + converter + " for 0x" 513 + Integer.toHexString(i)); 514 break; 515 } 516 } 517 518 // 1 << 7 to 1 << 11 (2 byters) 519 for (; i < size[1]; i++) { 520 x = size[0] + i*2; 521 y = size[0] + i; 522 bytes[x + 0] = (byte) (0xc0 | ((i >> 6) & 0x1f)); 523 bytes[x + 1] = (byte) (0x80 | ((i >> 0) & 0x3f)); 524 chars[y] = (char) i; 525 bs = ByteBuffer.wrap(bytes, x, 2).slice(); 526 us = CharBuffer.wrap(chars, y, 1).slice(); 527 try { 528 smBufDecode(decoder, converter, bs, us, true, false, true); 529 smBufDecode(decoder, converter, bs, us, true, false, false); 530 smBufEncode(encoder, converter, us, bs, true, false, true); 531 smBufEncode(encoder, converter, us, bs, true, false, false); 532 } catch (Exception ex) { 533 errln("Incorrect result in " + converter + " for 0x" 534 + Integer.toHexString(i)); 535 break; 536 } 537 } 538 539 // 1 << 11 to 1 << 16 (3 byters and surrogates) 540 for (; i < size[2]; i++) { 541 x = size[0] + size[1] * 2 + i * 3; 542 y = size[0] + size[1] + i; 543 bytes[x + 0] = (byte) (0xe0 | ((i >> 12) & 0x0f)); 544 bytes[x + 1] = (byte) (0x80 | ((i >> 6) & 0x3f)); 545 bytes[x + 2] = (byte) (0x80 | ((i >> 0) & 0x3f)); 546 chars[y] = (char) i; 547 if (!UTF16.isSurrogate(i)) { 548 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 549 us = CharBuffer.wrap(chars, y, 1).slice(); 550 try { 551 smBufDecode(decoder, converter, bs, us, true, false, true); 552 smBufDecode(decoder, converter, bs, us, true, false, false); 553 smBufEncode(encoder, converter, us, bs, true, false, true); 554 smBufEncode(encoder, converter, us, bs, true, false, false); 555 } catch (Exception ex) { 556 errln("Incorrect result in " + converter + " for 0x" 557 + Integer.toHexString(i)); 558 break; 559 } 560 } else { 561 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 562 us = CharBuffer.wrap(chars, y, 1).slice(); 563 564 decoder.reset(); 565 cr = decoder.decode(bs, us, true); 566 bs.rewind(); 567 us.rewind(); 568 if (!cr.isMalformed() || cr.length() != 1) { 569 errln("Incorrect result in " + converter + " decoder for 0x" 570 + Integer.toHexString(i) + " received " + cr); 571 break; 572 } 573 encoder.reset(); 574 cr = encoder.encode(us, bs, true); 575 bs.rewind(); 576 us.rewind(); 577 if (!cr.isMalformed() || cr.length() != 1) { 578 errln("Incorrect result in " + converter + " encoder for 0x" 579 + Integer.toHexString(i) + " received " + cr); 580 break; 581 } 582 583 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 584 us = CharBuffer.wrap(new String(chars, y, 1)); 585 586 decoder.reset(); 587 cr = decoder.decode(bs, us, true); 588 bs.rewind(); 589 us.rewind(); 590 if (!cr.isMalformed() || cr.length() != 1) { 591 errln("Incorrect result in " + converter + " decoder for 0x" 592 + Integer.toHexString(i) + " received " + cr); 593 break; 594 } 595 encoder.reset(); 596 cr = encoder.encode(us, bs, true); 597 bs.rewind(); 598 us.rewind(); 599 if (!cr.isMalformed() || cr.length() != 1) { 600 errln("Incorrect result in " + converter + " encoder for 0x" 601 + Integer.toHexString(i) + " received " + cr); 602 break; 603 } 604 605 606 } 607 } 608 if (true) 609 return; 610 } 611 612 @Test TestHZ()613 public void TestHZ() { 614 /* test input */ 615 char[] in = new char[] { 616 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 617 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 618 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 619 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 620 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 621 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 622 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 623 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 624 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 625 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 626 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 627 0x005A, 0x005B, 0x005C, 0x000A 628 }; 629 630 String converter = "HZ"; 631 CharsetProvider icu = new CharsetProviderICU(); 632 Charset icuChar = icu.charsetForName(converter); 633 CharsetEncoder encoder = icuChar.newEncoder(); 634 CharsetDecoder decoder = icuChar.newDecoder(); 635 try { 636 CharBuffer start = CharBuffer.wrap(in); 637 ByteBuffer bytes = encoder.encode(start); 638 CharBuffer finish = decoder.decode(bytes); 639 640 if (!equals(start, finish)) { 641 errln(converter + " roundtrip test failed: start does not match finish"); 642 643 char[] finishArray = new char[finish.limit()]; 644 for (int i=0; i<finishArray.length; i++) 645 finishArray[i] = finish.get(i); 646 647 logln("start: " + hex(in)); 648 logln("finish: " + hex(finishArray)); 649 } 650 } catch (CharacterCodingException ex) { 651 errln(converter + " roundtrip test failed: " + ex.getMessage()); 652 ex.printStackTrace(System.err); 653 } 654 655 /* For better code coverage */ 656 CoderResult result = CoderResult.UNDERFLOW; 657 byte byteout[] = { 658 (byte)0x7e, (byte)0x7d, (byte)0x41, 659 (byte)0x7e, (byte)0x7b, (byte)0x21, 660 }; 661 char charin[] = { 662 (char)0x0041, (char)0x0042, (char)0x3000 663 }; 664 ByteBuffer bb = ByteBuffer.wrap(byteout); 665 CharBuffer cb = CharBuffer.wrap(charin); 666 int testLoopSize = 5; 667 int bbLimits[] = { 0, 1, 3, 4, 6}; 668 int bbPositions[] = { 0, 0, 0, 3, 3 }; 669 int ccPositions[] = { 0, 0, 0, 2, 2 }; 670 for (int i = 0; i < testLoopSize; i++) { 671 encoder.reset(); 672 bb.limit(bbLimits[i]); 673 bb.position(bbPositions[i]); 674 cb.position(ccPositions[i]); 675 result = encoder.encode(cb, bb, true); 676 677 if (i < 3) { 678 if (!result.isOverflow()) { 679 errln("Overflow buffer error should have occurred while encoding HZ (" + i + ")"); 680 } 681 } else { 682 if (result.isError()) { 683 errln("Error should not have occurred while encoding HZ.(" + i + ")"); 684 } 685 } 686 } 687 } 688 689 @Test TestUTF8Surrogates()690 public void TestUTF8Surrogates() { 691 byte[][] in = new byte[][] { 692 { (byte)0x61, }, 693 { (byte)0xc2, (byte)0x80, }, 694 { (byte)0xe0, (byte)0xa0, (byte)0x80, }, 695 { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)0x80, }, 696 { (byte)0xf4, (byte)0x84, (byte)0x8c, (byte)0xa1, }, 697 { (byte)0xf0, (byte)0x90, (byte)0x90, (byte)0x81, }, 698 }; 699 700 /* expected test results */ 701 char[][] results = new char[][] { 702 /* number of bytes read, code point */ 703 { '\u0061', }, 704 { '\u0080', }, 705 { '\u0800', }, 706 { '\ud800', '\udc00', }, // 10000 707 { '\udbd0', '\udf21', }, // 104321 708 { '\ud801', '\udc01', }, // 10401 709 }; 710 711 /* error test input */ 712 byte[][] in2 = new byte[][] { 713 { (byte)0x61, }, 714 { (byte)0xc0, (byte)0x80, /* illegal non-shortest form */ 715 (byte)0xe0, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 716 (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 717 (byte)0xc0, (byte)0xc0, /* illegal trail byte */ 718 (byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80, /* 0x110000 out of range */ 719 (byte)0xf8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, /* too long */ 720 (byte)0xfe, /* illegal byte altogether */ 721 (byte)0x62, }, 722 }; 723 724 /* expected error test results */ 725 char[][] results2 = new char[][] { 726 /* number of bytes read, code point */ 727 { '\u0062', }, 728 { '\u0062', }, 729 }; 730 731 String converter = "UTF-8"; 732 CharsetProvider icu = new CharsetProviderICU(); 733 Charset icuChar = icu.charsetForName(converter); 734 CharsetDecoder decoder = icuChar.newDecoder(); 735 736 int i; 737 try { 738 for (i = 0; i < in.length; i++) { 739 ByteBuffer source = ByteBuffer.wrap(in[i]); 740 CharBuffer expected = CharBuffer.wrap(results[i]); 741 smBufDecode(decoder, converter, source, expected, true, false, 742 true); 743 smBufDecode(decoder, converter, source, expected, true, false, 744 false); 745 } 746 } catch (Exception ex) { 747 errln("Incorrect result in " + converter); 748 } 749 try { 750 for (i = 0; i < in2.length; i++) { 751 ByteBuffer source = ByteBuffer.wrap(in2[i]); 752 CharBuffer expected = CharBuffer.wrap(results2[i]); 753 decoder.onMalformedInput(CodingErrorAction.IGNORE); 754 smBufDecode(decoder, converter, source, expected, true, false, 755 true); 756 smBufDecode(decoder, converter, source, expected, true, false, 757 false); 758 } 759 } catch (Exception ex) { 760 errln("Incorrect result in " + converter); 761 } 762 } 763 764 @Test TestSurrogateBehavior()765 public void TestSurrogateBehavior() { 766 CharsetProviderICU icu = new CharsetProviderICU(); 767 768 // get all the converters into an array 769 Object[] converters = CharsetProviderICU.getAvailableNames(); 770 771 String norm = "a"; 772 String ext = "\u0275"; // theta 773 String lead = "\ud835"; 774 String trail = "\udd04"; 775 // lead + trail = \U1d504 (fraktur capital A) 776 777 String input = 778 // error position 779 ext // unmap(1) 1 780 + lead // under 1 781 + lead // malf(1) 2 782 + trail // unmap(2) 4 783 + trail // malf(1) 5 784 + ext // unmap(1) 6 785 + norm // unmap(1) 7 786 ; 787 CoderResult[] results = new CoderResult[] { 788 CoderResult.unmappableForLength(1), // or underflow 789 CoderResult.UNDERFLOW, 790 CoderResult.malformedForLength(1), 791 CoderResult.unmappableForLength(2), // or underflow 792 CoderResult.malformedForLength(1), 793 CoderResult.unmappableForLength(1), // or underflow 794 CoderResult.unmappableForLength(1), // or underflow 795 }; 796 int[] positions = new int[] { 1,1,2,4,5,6,7 }; 797 int n = positions.length; 798 799 int badcount = 0; 800 int goodcount = 0; 801 int[] uhohindices = new int[n]; 802 int[] badposindices = new int[n]; 803 int[] malfindices = new int[n]; 804 int[] unmapindices = new int[n]; 805 ArrayList pass = new ArrayList(); 806 ArrayList exempt = new ArrayList(); 807 808 outer: for (int conv=0; conv<converters.length; conv++) { 809 String converter = (String)converters[conv]; 810 if (converter.equals("x-IMAP-mailbox-name") || converter.equals("UTF-7") || converter.equals("CESU-8") || converter.equals("BOCU-1") || 811 converter.equals("x-LMBCS-1")) { 812 exempt.add(converter); 813 continue; 814 } 815 816 boolean currentlybad = false; 817 Charset icuChar = icu.charsetForName(converter); 818 CharsetEncoder encoder = icuChar.newEncoder(); 819 CoderResult cr; 820 821 CharBuffer source = CharBuffer.wrap(input); 822 ByteBuffer target = ByteBuffer.allocate(30); 823 ByteBuffer expected = null; 824 try { 825 encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); 826 encoder.onMalformedInput(CodingErrorAction.IGNORE); 827 expected = encoder.encode(CharBuffer.wrap(ext + lead + trail + ext + norm)); 828 encoder.reset(); 829 } catch (CharacterCodingException ex) { 830 errln("Unexpected CharacterCodingException: " + ex.getMessage()); 831 return; 832 } catch (RuntimeException ex) { 833 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 834 errln(converter + " " + ex.getClass().getName() + ": " + ex.getMessage()); 835 continue outer; 836 } 837 838 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 839 encoder.onMalformedInput(CodingErrorAction.REPORT); 840 for (int i=0; i<n; i++) { 841 source.limit(i+1); 842 cr = encoder.encode(source, target, i == n - 1); 843 if (!(equals(cr, results[i]) 844 || (results[i].isUnmappable() && cr.isUnderflow()) // mappability depends on the converter 845 )) { 846 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 847 if (results[i].isMalformed() && cr.isMalformed()) { 848 malfindices[i]++; 849 } else if (results[i].isUnmappable() && cr.isUnmappable()) { 850 unmapindices[i]++; 851 } else { 852 uhohindices[i]++; 853 } 854 errln("(index=" + i + ") " + converter + " Received: " + cr + " Expected: " + results[i]); 855 } 856 if (source.position() != positions[i]) { 857 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 858 badposindices[i]++; 859 errln("(index=" + i + ") " + converter + " Received: " + source.position() + " Expected: " + positions[i]); 860 } 861 862 } 863 encoder.reset(); 864 865 //System.out.println("\n" + hex(target.array())); 866 //System.out.println(hex(expected.array()) + "\n" + expected.limit()); 867 if (!(equals(target, expected, expected.limit()) && target.position() == expected.limit())) { 868 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 869 errln(converter + " Received: \"" + hex(target.array()) + "\" Expected: \"" + hex(expected.array()) + "\""); 870 } 871 872 if (!currentlybad) { 873 goodcount++; 874 pass.add(converter); 875 } 876 } 877 878 logln("\n" + badcount + " / " + (converters.length - exempt.size()) + " (" + goodcount + " good, " + badcount + " bad)"); 879 log("index\t"); for (int i=0; i<n; i++) log(i + "\t"); logln(""); 880 log("unmap\t"); for (int i=0; i<n; i++) log(unmapindices[i] + "\t"); logln(""); 881 log("malf \t"); for (int i=0; i<n; i++) log(malfindices[i] + "\t"); logln(""); 882 log("pos \t"); for (int i=0; i<n; i++) log(badposindices[i] + "\t"); logln(""); 883 log("uhoh \t"); for (int i=0; i<n; i++) log(uhohindices[i] + "\t"); logln(""); 884 logln(""); 885 log("The few that passed: "); for (int i=0; i<pass.size(); i++) log(pass.get(i) + ", "); logln(""); 886 log("The few that are exempt: "); for (int i=0; i<exempt.size(); i++) log(exempt.get(i) + ", "); logln(""); 887 } 888 889 // public void TestCharsetCallback() { 890 // String currentTest = "initialization"; 891 // try { 892 // Class[] params; 893 // 894 // // get the classes 895 // Class CharsetCallback = Class.forName("com.ibm.icu.charset.CharsetCallback"); 896 // Class Decoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Decoder"); 897 // Class Encoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Encoder"); 898 // 899 // // set up encoderCall 900 // params = new Class[] {CharsetEncoderICU.class, Object.class, 901 // CharBuffer.class, ByteBuffer.class, IntBuffer.class, 902 // char[].class, int.class, int.class, CoderResult.class }; 903 // Method encoderCall = Encoder.getDeclaredMethod("call", params); 904 // 905 // // set up decoderCall 906 // params = new Class[] {CharsetDecoderICU.class, Object.class, 907 // ByteBuffer.class, CharBuffer.class, IntBuffer.class, 908 // char[].class, int.class, CoderResult.class}; 909 // Method decoderCall = Decoder.getDeclaredMethod("call", params); 910 // 911 // // get relevant fields 912 // Object SUB_STOP_ON_ILLEGAL = getFieldValue(CharsetCallback, "SUB_STOP_ON_ILLEGAL", null); 913 // 914 // // set up a few arguments 915 // CharsetProvider provider = new CharsetProviderICU(); 916 // Charset charset = provider.charsetForName("UTF-8"); 917 // CharsetEncoderICU encoder = (CharsetEncoderICU)charset.newEncoder(); 918 // CharsetDecoderICU decoder = (CharsetDecoderICU)charset.newDecoder(); 919 // CharBuffer chars = CharBuffer.allocate(10); 920 // chars.put('o'); 921 // chars.put('k'); 922 // ByteBuffer bytes = ByteBuffer.allocate(10); 923 // bytes.put((byte)'o'); 924 // bytes.put((byte)'k'); 925 // IntBuffer offsets = IntBuffer.allocate(10); 926 // offsets.put(0); 927 // offsets.put(1); 928 // char[] buffer = null; 929 // Integer length = new Integer(2); 930 // Integer cp = new Integer(0); 931 // CoderResult unmap = CoderResult.unmappableForLength(2); 932 // CoderResult malf = CoderResult.malformedForLength(2); 933 // CoderResult under = CoderResult.UNDERFLOW; 934 // 935 // // set up error arrays 936 // Integer invalidCharLength = new Integer(1); 937 // Byte subChar1 = new Byte((byte)0); 938 // Byte subChar1_alternate = new Byte((byte)1); // for TO_U_CALLBACK_SUBSTITUTE 939 // 940 // // set up chars and bytes backups and expected values for certain cases 941 // CharBuffer charsBackup = bufferCopy(chars); 942 // ByteBuffer bytesBackup = bufferCopy(bytes); 943 // IntBuffer offsetsBackup = bufferCopy(offsets); 944 // CharBuffer encoderCharsExpected = bufferCopy(chars); 945 // ByteBuffer encoderBytesExpected = bufferCopy(bytes); 946 // IntBuffer encoderOffsetsExpected = bufferCopy(offsets); 947 // CharBuffer decoderCharsExpected1 = bufferCopy(chars); 948 // CharBuffer decoderCharsExpected2 = bufferCopy(chars); 949 // IntBuffer decoderOffsetsExpected1 = bufferCopy(offsets); 950 // IntBuffer decoderOffsetsExpected2 = bufferCopy(offsets); 951 // 952 // // initialize fields to obtain expected data 953 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 954 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1); 955 // 956 // // run cbFromUWriteSub 957 // Method cbFromUWriteSub = CharsetEncoderICU.class.getDeclaredMethod("cbFromUWriteSub", new Class[] { CharsetEncoderICU.class, CharBuffer.class, ByteBuffer.class, IntBuffer.class}); 958 // cbFromUWriteSub.setAccessible(true); 959 // CoderResult encoderResultExpected = (CoderResult)cbFromUWriteSub.invoke(encoder, new Object[] {encoder, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected}); 960 // 961 // // run toUWriteUChars with normal data 962 // Method toUWriteUChars = CharsetDecoderICU.class.getDeclaredMethod("toUWriteUChars", new Class[] { CharsetDecoderICU.class, char[].class, int.class, int.class, CharBuffer.class, IntBuffer.class, int.class}); 963 // toUWriteUChars.setAccessible(true); 964 // CoderResult decoderResultExpected1 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0xFFFD}, new Integer(0), new Integer(1), decoderCharsExpected1, decoderOffsetsExpected1, new Integer(bytes.position())}); 965 // 966 // // reset certain fields 967 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 968 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1_alternate); 969 // 970 // // run toUWriteUChars again 971 // CoderResult decoderResultExpected2 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0x1A}, new Integer(0), new Integer(1), decoderCharsExpected2, decoderOffsetsExpected2, new Integer(bytes.position())}); 972 // 973 // // begin creating the tests array 974 // ArrayList tests = new ArrayList(); 975 // 976 // // create tests for FROM_U_CALLBACK_SKIP 0 977 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 978 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 979 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 980 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 981 // 982 // // create tests for TO_U_CALLBACK_SKIP 4 983 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 984 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 985 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 986 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 987 // 988 // // create tests for FROM_U_CALLBACK_STOP 8 989 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 990 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 991 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 992 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 993 // 994 // // create tests for TO_U_CALLBACK_STOP 12 995 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 996 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 997 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 998 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 999 // 1000 // // create tests for FROM_U_CALLBACK_SUBSTITUTE 16 1001 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1002 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1003 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1004 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1005 // 1006 // // create tests for TO_U_CALLBACK_SUBSTITUTE 20 1007 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected1, decoderCharsExpected1, bytesBackup, decoderOffsetsExpected1, new Object[] { invalidCharLength, subChar1 }}); 1008 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected2, decoderCharsExpected2, bytesBackup, decoderOffsetsExpected2, new Object[] { invalidCharLength, subChar1_alternate }}); 1009 // 1010 // Iterator iter = tests.iterator(); 1011 // for (int i=0; iter.hasNext(); i++) { 1012 // // get the data out of the map 1013 // Object[] next = (Object[])iter.next(); 1014 // 1015 // Method method = (Method)next[0]; 1016 // String fieldName = (String)next[1]; 1017 // Object field = getFieldValue(CharsetCallback, fieldName, null); 1018 // Object[] args = (Object[])next[2]; 1019 // CoderResult expected = (CoderResult)next[3]; 1020 // CharBuffer charsExpected = (CharBuffer)next[4]; 1021 // ByteBuffer bytesExpected = (ByteBuffer)next[5]; 1022 // IntBuffer offsetsExpected = (IntBuffer)next[6]; 1023 // 1024 // // set up error arrays and certain fields 1025 // Object[] values = (Object[])next[7]; 1026 // if (method == decoderCall) { 1027 // decoder.reset(); 1028 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, values[0]); 1029 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), values[1]); 1030 // } else if (method == encoderCall) { 1031 // encoder.reset(); 1032 // } 1033 // 1034 // try { 1035 // // invoke the method 1036 // CoderResult actual = (CoderResult)method.invoke(field, args); 1037 // 1038 // // if expected != actual 1039 // if (!coderResultsEqual(expected, actual)) { 1040 // // case #i refers to the index in the arraylist tests 1041 // errln(fieldName + " failed to return the correct result for case #" + i + "."); 1042 // } 1043 // // if the expected buffers != actual buffers 1044 // else if (!(buffersEqual(chars, charsExpected) && 1045 // buffersEqual(bytes, bytesExpected) && 1046 // buffersEqual(offsets, offsetsExpected))) { 1047 // // case #i refers to the index in the arraylist tests 1048 // errln(fieldName + " did not perform the correct operation on the buffers for case #" + i + "."); 1049 // } 1050 // } catch (InvocationTargetException ex) { 1051 // // case #i refers to the index in the arraylist tests 1052 // errln(fieldName + " threw an exception for case #" + i + ": " + ex.getCause()); 1053 // //ex.getCause().printStackTrace(); 1054 // } 1055 // 1056 // // reset the buffers 1057 // System.arraycopy(bytesBackup.array(), 0, bytes.array(), 0, 10); 1058 // System.arraycopy(charsBackup.array(), 0, chars.array(), 0, 10); 1059 // System.arraycopy(offsetsBackup.array(), 0, offsets.array(), 0, 10); 1060 // bytes.position(bytesBackup.position()); 1061 // chars.position(charsBackup.position()); 1062 // offsets.position(offsetsBackup.position()); 1063 // } 1064 // 1065 // } catch (Exception ex) { 1066 // errln("TestCharsetCallback skipped due to " + ex.toString()); 1067 // ex.printStackTrace(); 1068 // } 1069 // } 1070 // 1071 // private Object getFieldValue(Class c, String name, Object instance) throws Exception { 1072 // Field field = c.getDeclaredField(name); 1073 // field.setAccessible(true); 1074 // return field.get(instance); 1075 // } 1076 // private void setFieldValue(Class c, String name, Object instance, Object value) throws Exception { 1077 // Field field = c.getDeclaredField(name); 1078 // field.setAccessible(true); 1079 // if (value instanceof Boolean) 1080 // field.setBoolean(instance, ((Boolean)value).booleanValue()); 1081 // else if (value instanceof Byte) 1082 // field.setByte(instance, ((Byte)value).byteValue()); 1083 // else if (value instanceof Character) 1084 // field.setChar(instance, ((Character)value).charValue()); 1085 // else if (value instanceof Double) 1086 // field.setDouble(instance, ((Double)value).doubleValue()); 1087 // else if (value instanceof Float) 1088 // field.setFloat(instance, ((Float)value).floatValue()); 1089 // else if (value instanceof Integer) 1090 // field.setInt(instance, ((Integer)value).intValue()); 1091 // else if (value instanceof Long) 1092 // field.setLong(instance, ((Long)value).longValue()); 1093 // else if (value instanceof Short) 1094 // field.setShort(instance, ((Short)value).shortValue()); 1095 // else 1096 // field.set(instance, value); 1097 // } 1098 // private boolean coderResultsEqual(CoderResult a, CoderResult b) { 1099 // if (a == null && b == null) 1100 // return true; 1101 // if (a == null || b == null) 1102 // return false; 1103 // if ((a.isUnderflow() && b.isUnderflow()) || (a.isOverflow() && b.isOverflow())) 1104 // return true; 1105 // if (a.length() != b.length()) 1106 // return false; 1107 // if ((a.isMalformed() && b.isMalformed()) || (a.isUnmappable() && b.isUnmappable())) 1108 // return true; 1109 // return false; 1110 // } 1111 // private boolean buffersEqual(ByteBuffer a, ByteBuffer b) { 1112 // if (a.position() != b.position()) 1113 // return false; 1114 // int limit = a.position(); 1115 // for (int i=0; i<limit; i++) 1116 // if (a.get(i) != b.get(i)) 1117 // return false; 1118 // return true; 1119 // } 1120 // private boolean buffersEqual(CharBuffer a, CharBuffer b) { 1121 // if (a.position() != b.position()) 1122 // return false; 1123 // int limit = a.position(); 1124 // for (int i=0; i<limit; i++) 1125 // if (a.get(i) != b.get(i)) 1126 // return false; 1127 // return true; 1128 // } 1129 // private boolean buffersEqual(IntBuffer a, IntBuffer b) { 1130 // if (a.position() != b.position()) 1131 // return false; 1132 // int limit = a.position(); 1133 // for (int i=0; i<limit; i++) 1134 // if (a.get(i) != b.get(i)) 1135 // return false; 1136 // return true; 1137 // } 1138 // private ByteBuffer bufferCopy(ByteBuffer src) { 1139 // ByteBuffer dest = ByteBuffer.allocate(src.limit()); 1140 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1141 // dest.position(src.position()); 1142 // return dest; 1143 // } 1144 // private CharBuffer bufferCopy(CharBuffer src) { 1145 // CharBuffer dest = CharBuffer.allocate(src.limit()); 1146 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1147 // dest.position(src.position()); 1148 // return dest; 1149 // } 1150 // private IntBuffer bufferCopy(IntBuffer src) { 1151 // IntBuffer dest = IntBuffer.allocate(src.limit()); 1152 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1153 // dest.position(src.position()); 1154 // return dest; 1155 // } 1156 1157 1158 @Test TestAPISemantics( )1159 public void TestAPISemantics(/*String encoding*/) { 1160 String encoding = "UTF-16"; 1161 CharsetDecoder decoder = null; 1162 CharsetEncoder encoder = null; 1163 try { 1164 CharsetProviderICU provider = new CharsetProviderICU(); 1165 Charset charset = provider.charsetForName(encoding); 1166 decoder = charset.newDecoder(); 1167 encoder = charset.newEncoder(); 1168 } catch(MissingResourceException ex) { 1169 warnln("Could not load charset data: " + encoding); 1170 return; 1171 } 1172 1173 final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n"; 1174 final byte[] byteStr = { 1175 (byte) 0x00,(byte) 'a', 1176 (byte) 0x00,(byte) 'b', 1177 (byte) 0x00,(byte) 'c', 1178 (byte) 0x00,(byte) 'd', 1179 (byte) 0xd8,(byte) 0x00, 1180 (byte) 0xdc,(byte) 0x00, 1181 (byte) 0x12,(byte) 0x34, 1182 (byte) 0x00,(byte) 0xa5, 1183 (byte) 0x30,(byte) 0x00, 1184 (byte) 0x00,(byte) 0x0d, 1185 (byte) 0x00,(byte) 0x0a 1186 }; 1187 final byte[] expectedByteStr = { 1188 (byte) 0xfe,(byte) 0xff, 1189 (byte) 0x00,(byte) 'a', 1190 (byte) 0x00,(byte) 'b', 1191 (byte) 0x00,(byte) 'c', 1192 (byte) 0x00,(byte) 'd', 1193 (byte) 0xd8,(byte) 0x00, 1194 (byte) 0xdc,(byte) 0x00, 1195 (byte) 0x12,(byte) 0x34, 1196 (byte) 0x00,(byte) 0xa5, 1197 (byte) 0x30,(byte) 0x00, 1198 (byte) 0x00,(byte) 0x0d, 1199 (byte) 0x00,(byte) 0x0a 1200 }; 1201 1202 ByteBuffer byes = ByteBuffer.wrap(byteStr); 1203 CharBuffer uniVal = CharBuffer.wrap(unistr); 1204 ByteBuffer expected = ByteBuffer.wrap(expectedByteStr); 1205 1206 int rc = 0; 1207 if(decoder==null){ 1208 warnln("Could not load decoder."); 1209 return; 1210 } 1211 decoder.reset(); 1212 /* Convert the whole buffer to Unicode */ 1213 try { 1214 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1215 CoderResult result = decoder.decode(byes, chars, false); 1216 1217 if (result.isError()) { 1218 errln("ToChars encountered Error"); 1219 rc = 1; 1220 } 1221 if (result.isOverflow()) { 1222 errln("ToChars encountered overflow exception"); 1223 rc = 1; 1224 } 1225 if (!equals(chars, unistr)) { 1226 errln("ToChars does not match"); 1227 printchars(chars); 1228 errln("Expected : "); 1229 printchars(unistr); 1230 rc = 2; 1231 } 1232 1233 } catch (Exception e) { 1234 errln("ToChars - exception in buffer"); 1235 rc = 5; 1236 } 1237 1238 /* Convert single bytes to Unicode */ 1239 try { 1240 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1241 ByteBuffer b = ByteBuffer.wrap(byteStr); 1242 decoder.reset(); 1243 CoderResult result=null; 1244 for (int i = 1; i <= byteStr.length; i++) { 1245 b.limit(i); 1246 result = decoder.decode(b, chars, false); 1247 if(result.isOverflow()){ 1248 errln("ToChars single threw an overflow exception"); 1249 } 1250 if (result.isError()) { 1251 errln("ToChars single the result is an error "+result.toString()); 1252 } 1253 } 1254 if (unistr.length() != (chars.limit())) { 1255 errln("ToChars single len does not match"); 1256 rc = 3; 1257 } 1258 if (!equals(chars, unistr)) { 1259 errln("ToChars single does not match"); 1260 printchars(chars); 1261 rc = 4; 1262 } 1263 } catch (Exception e) { 1264 errln("ToChars - exception in single"); 1265 //e.printStackTrace(); 1266 rc = 6; 1267 } 1268 1269 /* Convert the buffer one at a time to Unicode */ 1270 try { 1271 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1272 decoder.reset(); 1273 byes.rewind(); 1274 for (int i = 1; i <= byteStr.length; i++) { 1275 byes.limit(i); 1276 CoderResult result = decoder.decode(byes, chars, false); 1277 if (result.isError()) { 1278 errln("Error while decoding: "+result.toString()); 1279 } 1280 if(result.isOverflow()){ 1281 errln("ToChars Simple threw an overflow exception"); 1282 } 1283 } 1284 if (chars.limit() != unistr.length()) { 1285 errln("ToChars Simple buffer len does not match"); 1286 rc = 7; 1287 } 1288 if (!equals(chars, unistr)) { 1289 errln("ToChars Simple buffer does not match"); 1290 printchars(chars); 1291 err(" Expected : "); 1292 printchars(unistr); 1293 rc = 8; 1294 } 1295 } catch (Exception e) { 1296 errln("ToChars - exception in single buffer"); 1297 //e.printStackTrace(System.err); 1298 rc = 9; 1299 } 1300 if (rc != 0) { 1301 errln("Test Simple ToChars for encoding : FAILED"); 1302 } 1303 1304 rc = 0; 1305 /* Convert the whole buffer from unicode */ 1306 try { 1307 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1308 encoder.reset(); 1309 CoderResult result = encoder.encode(uniVal, bytes, false); 1310 if (result.isError()) { 1311 errln("FromChars reported error: " + result.toString()); 1312 rc = 1; 1313 } 1314 if(result.isOverflow()){ 1315 errln("FromChars threw an overflow exception"); 1316 } 1317 bytes.position(0); 1318 if (!bytes.equals(expected)) { 1319 errln("FromChars does not match"); 1320 printbytes(bytes); 1321 printbytes(expected); 1322 rc = 2; 1323 } 1324 } catch (Exception e) { 1325 errln("FromChars - exception in buffer"); 1326 //e.printStackTrace(System.err); 1327 rc = 5; 1328 } 1329 1330 /* Convert the buffer one char at a time to unicode */ 1331 try { 1332 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1333 CharBuffer c = CharBuffer.wrap(unistr); 1334 encoder.reset(); 1335 CoderResult result= null; 1336 for (int i = 1; i <= unistr.length(); i++) { 1337 c.limit(i); 1338 result = encoder.encode(c, bytes, false); 1339 if(result.isOverflow()){ 1340 errln("FromChars single threw an overflow exception"); 1341 } 1342 if(result.isError()){ 1343 errln("FromChars single threw an error: "+ result.toString()); 1344 } 1345 } 1346 if (expectedByteStr.length != bytes.limit()) { 1347 errln("FromChars single len does not match"); 1348 rc = 3; 1349 } 1350 1351 bytes.position(0); 1352 if (!bytes.equals(expected)) { 1353 errln("FromChars single does not match"); 1354 printbytes(bytes); 1355 printbytes(expected); 1356 rc = 4; 1357 } 1358 1359 } catch (Exception e) { 1360 errln("FromChars - exception in single"); 1361 //e.printStackTrace(System.err); 1362 rc = 6; 1363 } 1364 1365 /* Convert one char at a time to unicode */ 1366 try { 1367 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1368 encoder.reset(); 1369 char[] temp = unistr.toCharArray(); 1370 CoderResult result=null; 1371 for (int i = 0; i <= temp.length; i++) { 1372 uniVal.limit(i); 1373 result = encoder.encode(uniVal, bytes, false); 1374 if(result.isOverflow()){ 1375 errln("FromChars simple threw an overflow exception"); 1376 } 1377 if(result.isError()){ 1378 errln("FromChars simple threw an error: "+ result.toString()); 1379 } 1380 } 1381 if (bytes.limit() != expectedByteStr.length) { 1382 errln("FromChars Simple len does not match"); 1383 rc = 7; 1384 } 1385 if (!bytes.equals(byes)) { 1386 errln("FromChars Simple does not match"); 1387 printbytes(bytes); 1388 printbytes(byes); 1389 rc = 8; 1390 } 1391 } catch (Exception e) { 1392 errln("FromChars - exception in single buffer"); 1393 //e.printStackTrace(System.err); 1394 rc = 9; 1395 } 1396 if (rc != 0) { 1397 errln("Test Simple FromChars " + encoding + " --FAILED"); 1398 } 1399 } 1400 printchars(CharBuffer buf)1401 void printchars(CharBuffer buf) { 1402 int i; 1403 char[] chars = new char[buf.limit()]; 1404 //save the current position 1405 int pos = buf.position(); 1406 buf.position(0); 1407 buf.get(chars); 1408 //reset to old position 1409 buf.position(pos); 1410 for (i = 0; i < chars.length; i++) { 1411 err(hex(chars[i]) + " "); 1412 } 1413 errln(""); 1414 } printchars(String str)1415 void printchars(String str) { 1416 char[] chars = str.toCharArray(); 1417 for (int i = 0; i < chars.length; i++) { 1418 err(hex(chars[i]) + " "); 1419 } 1420 errln(""); 1421 } printbytes(ByteBuffer buf)1422 void printbytes(ByteBuffer buf) { 1423 int i; 1424 byte[] bytes = new byte[buf.limit()]; 1425 //save the current position 1426 int pos = buf.position(); 1427 buf.position(0); 1428 buf.get(bytes); 1429 //reset to old position 1430 buf.position(pos); 1431 for (i = 0; i < bytes.length; i++) { 1432 System.out.print(hex(bytes[i]) + " "); 1433 } 1434 errln(""); 1435 } 1436 equals(CoderResult a, CoderResult b)1437 public boolean equals(CoderResult a, CoderResult b) { 1438 return (a.isUnderflow() && b.isUnderflow()) 1439 || (a.isOverflow() && b.isOverflow()) 1440 || (a.isMalformed() && b.isMalformed() && a.length() == b.length()) 1441 || (a.isUnmappable() && b.isUnmappable() && a.length() == b.length()); 1442 } equals(CharBuffer buf, String str)1443 public boolean equals(CharBuffer buf, String str) { 1444 return equals(buf, str.toCharArray()); 1445 } equals(CharBuffer buf, CharBuffer str)1446 public boolean equals(CharBuffer buf, CharBuffer str) { 1447 if (buf.limit() != str.limit()) 1448 return false; 1449 int limit = buf.limit(); 1450 for (int i = 0; i < limit; i++) 1451 if (buf.get(i) != str.get(i)) 1452 return false; 1453 return true; 1454 } equals(CharBuffer buf, CharBuffer str, int limit)1455 public boolean equals(CharBuffer buf, CharBuffer str, int limit) { 1456 if (limit > buf.limit() || limit > str.limit()) 1457 return false; 1458 for (int i = 0; i < limit; i++) 1459 if (buf.get(i) != str.get(i)) 1460 return false; 1461 return true; 1462 } equals(CharBuffer buf, char[] compareTo)1463 public boolean equals(CharBuffer buf, char[] compareTo) { 1464 char[] chars = new char[buf.limit()]; 1465 //save the current position 1466 int pos = buf.position(); 1467 buf.position(0); 1468 buf.get(chars); 1469 //reset to old position 1470 buf.position(pos); 1471 return equals(chars, compareTo); 1472 } 1473 equals(char[] chars, char[] compareTo)1474 public boolean equals(char[] chars, char[] compareTo) { 1475 if (chars.length != compareTo.length) { 1476 errln( 1477 "Length does not match chars: " 1478 + chars.length 1479 + " compareTo: " 1480 + compareTo.length); 1481 return false; 1482 } else { 1483 boolean result = true; 1484 for (int i = 0; i < chars.length; i++) { 1485 if (chars[i] != compareTo[i]) { 1486 logln( 1487 "Got: " 1488 + hex(chars[i]) 1489 + " Expected: " 1490 + hex(compareTo[i]) 1491 + " At: " 1492 + i); 1493 result = false; 1494 } 1495 } 1496 return result; 1497 } 1498 } 1499 equals(ByteBuffer buf, byte[] compareTo)1500 public boolean equals(ByteBuffer buf, byte[] compareTo) { 1501 byte[] chars = new byte[buf.limit()]; 1502 //save the current position 1503 int pos = buf.position(); 1504 buf.position(0); 1505 buf.get(chars); 1506 //reset to old position 1507 buf.position(pos); 1508 return equals(chars, compareTo); 1509 } equals(ByteBuffer buf, ByteBuffer compareTo)1510 public boolean equals(ByteBuffer buf, ByteBuffer compareTo) { 1511 if (buf.limit() != compareTo.limit()) 1512 return false; 1513 int limit = buf.limit(); 1514 for (int i = 0; i < limit; i++) 1515 if (buf.get(i) != compareTo.get(i)) 1516 return false; 1517 return true; 1518 } equals(ByteBuffer buf, ByteBuffer compareTo, int limit)1519 public boolean equals(ByteBuffer buf, ByteBuffer compareTo, int limit) { 1520 if (limit > buf.limit() || limit > compareTo.limit()) 1521 return false; 1522 for (int i = 0; i < limit; i++) 1523 if (buf.get(i) != compareTo.get(i)) 1524 return false; 1525 return true; 1526 } equals(byte[] chars, byte[] compareTo)1527 public boolean equals(byte[] chars, byte[] compareTo) { 1528 if (false/*chars.length != compareTo.length*/) { 1529 errln( 1530 "Length does not match chars: " 1531 + chars.length 1532 + " compareTo: " 1533 + compareTo.length); 1534 return false; 1535 } else { 1536 boolean result = true; 1537 for (int i = 0; i < chars.length; i++) { 1538 if (chars[i] != compareTo[i]) { 1539 logln( 1540 "Got: " 1541 + hex(chars[i]) 1542 + " Expected: " 1543 + hex(compareTo[i]) 1544 + " At: " 1545 + i); 1546 result = false; 1547 } 1548 } 1549 return result; 1550 } 1551 } 1552 1553 // TODO 1554 /* 1555 @Test 1556 public void TestCallback(String encoding) throws Exception { 1557 1558 byte[] gbSource = 1559 { 1560 (byte) 0x81, 1561 (byte) 0x36, 1562 (byte) 0xDE, 1563 (byte) 0x36, 1564 (byte) 0x81, 1565 (byte) 0x36, 1566 (byte) 0xDE, 1567 (byte) 0x37, 1568 (byte) 0x81, 1569 (byte) 0x36, 1570 (byte) 0xDE, 1571 (byte) 0x38, 1572 (byte) 0xe3, 1573 (byte) 0x32, 1574 (byte) 0x9a, 1575 (byte) 0x36 }; 1576 1577 char[] subChars = { 'P', 'I' }; 1578 1579 decoder.reset(); 1580 1581 decoder.replaceWith(new String(subChars)); 1582 ByteBuffer mySource = ByteBuffer.wrap(gbSource); 1583 CharBuffer myTarget = CharBuffer.allocate(5); 1584 1585 decoder.decode(mySource, myTarget, true); 1586 char[] expectedResult = 1587 { '\u22A6', '\u22A7', '\u22A8', '\u0050', '\u0049', }; 1588 1589 if (!equals(myTarget, new String(expectedResult))) { 1590 errln("Test callback GB18030 to Unicode : FAILED"); 1591 } 1592 1593 } 1594 */ 1595 1596 @Test TestCanConvert( )1597 public void TestCanConvert(/*String encoding*/)throws Exception { 1598 char[] mySource = { 1599 '\ud800', '\udc00',/*surrogate pair */ 1600 '\u22A6','\u22A7','\u22A8','\u22A9','\u22AA', 1601 '\u22AB','\u22AC','\u22AD','\u22AE','\u22AF', 1602 '\u22B0','\u22B1','\u22B2','\u22B3','\u22B4', 1603 '\ud800','\udc00',/*surrogate pair */ 1604 '\u22B5','\u22B6','\u22B7','\u22B8','\u22B9', 1605 '\u22BA','\u22BB','\u22BC','\u22BD','\u22BE' 1606 }; 1607 String encoding = "UTF-16"; 1608 CharsetEncoder encoder = null; 1609 try { 1610 CharsetProviderICU provider = new CharsetProviderICU(); 1611 Charset charset = provider.charsetForName(encoding); 1612 encoder = charset.newEncoder(); 1613 } catch(MissingResourceException ex) { 1614 warnln("Could not load charset data: " + encoding); 1615 return; 1616 } 1617 if (!encoder.canEncode(new String(mySource))) { 1618 errln("Test canConvert() " + encoding + " failed. "+encoder); 1619 } 1620 1621 } 1622 1623 @Test TestAvailableCharsets()1624 public void TestAvailableCharsets() { 1625 SortedMap map = Charset.availableCharsets(); 1626 Set keySet = map.keySet(); 1627 Iterator iter = keySet.iterator(); 1628 while(iter.hasNext()){ 1629 logln("Charset name: "+iter.next().toString()); 1630 } 1631 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1632 int mapSize = map.size(); 1633 if(mapSize < charsets.length){ 1634 errln("Charset.availableCharsets() returned a number less than the number returned by icu. ICU: " + charsets.length 1635 + " JDK: " + mapSize); 1636 } 1637 logln("Total Number of chasets = " + map.size()); 1638 } 1639 1640 @Test TestWindows936()1641 public void TestWindows936(){ 1642 CharsetProviderICU icu = new CharsetProviderICU(); 1643 Charset cs = icu.charsetForName("windows-936-2000"); 1644 String canonicalName = cs.name(); 1645 if(!canonicalName.equals("GBK")){ 1646 errln("Did not get the expected canonical name. Got: "+canonicalName); //get the canonical name 1647 } 1648 } 1649 1650 @Test TestICUAvailableCharsets()1651 public void TestICUAvailableCharsets() { 1652 CharsetProviderICU icu = new CharsetProviderICU(); 1653 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1654 for(int i=0;i<charsets.length;i++){ 1655 Charset cs = icu.charsetForName((String)charsets[i]); 1656 try{ 1657 CharsetEncoder encoder = cs.newEncoder(); 1658 if(encoder!=null){ 1659 logln("Creation of encoder succeeded. "+cs.toString()); 1660 } 1661 }catch(Exception ex){ 1662 errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString()); 1663 } 1664 try{ 1665 CharsetDecoder decoder = cs.newDecoder(); 1666 if(decoder!=null){ 1667 logln("Creation of decoder succeeded. "+cs.toString()); 1668 } 1669 }catch(Exception ex){ 1670 errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString()); 1671 } 1672 } 1673 } 1674 1675 /* jitterbug 4312 */ 1676 @Test TestUnsupportedCharset()1677 public void TestUnsupportedCharset(){ 1678 CharsetProvider icu = new CharsetProviderICU(); 1679 Charset icuChar = icu.charsetForName("impossible"); 1680 if(icuChar != null){ 1681 errln("ICU does not conform to the spec"); 1682 } 1683 } 1684 1685 @Test TestEncoderCreation()1686 public void TestEncoderCreation(){ 1687 // Use CharsetICU.forNameICU() so that we get the ICU version 1688 // even if the system or another provider also supports this charset. 1689 String encoding = "GB_2312-80"; 1690 try{ 1691 Charset cs = CharsetICU.forNameICU(encoding); 1692 CharsetEncoder enc = cs.newEncoder(); 1693 if(enc!=null){ 1694 logln("Successfully created an encoder for " + encoding + ": " + enc); 1695 if(!(enc instanceof CharsetEncoderICU)) { 1696 errln("Expected " + encoding + 1697 " to be implemented by ICU but got an instance of " + enc.getClass()); 1698 } 1699 }else{ 1700 errln("Error creating charset encoder for " + encoding); 1701 } 1702 }catch(Exception e){ 1703 warnln("Error creating charset encoder for " + encoding + ": " + e); 1704 } 1705 // Use Charset.forName() which may return an ICU Charset or some other implementation. 1706 encoding = "x-ibm-971_P100-1995"; 1707 try{ 1708 Charset cs = Charset.forName(encoding); 1709 CharsetEncoder enc = cs.newEncoder(); 1710 if(enc!=null){ 1711 logln("Successfully created an encoder for " + encoding + ": " + enc + 1712 " which is implemented by ICU? " + (enc instanceof CharsetEncoderICU)); 1713 }else{ 1714 errln("Error creating charset encoder for " + encoding); 1715 } 1716 }catch(Exception e){ 1717 warnln("Error creating charset encoder for " + encoding + ": " + e); 1718 } 1719 } 1720 1721 @Test TestSubBytes()1722 public void TestSubBytes(){ 1723 try{ 1724 //create utf-8 decoder 1725 CharsetDecoder decoder = new CharsetProviderICU().charsetForName("utf-8").newDecoder(); 1726 1727 //create a valid byte array, which can be decoded to " buffer" 1728 byte[] unibytes = new byte[] { 0x0020, 0x0062, 0x0075, 0x0066, 0x0066, 0x0065, 0x0072 }; 1729 1730 ByteBuffer buffer = ByteBuffer.allocate(20); 1731 1732 //add a evil byte to make the byte buffer be malformed input 1733 buffer.put((byte)0xd8); 1734 1735 //put the valid byte array 1736 buffer.put(unibytes); 1737 1738 //reset position 1739 buffer.flip(); 1740 1741 decoder.onMalformedInput(CodingErrorAction.REPLACE); 1742 CharBuffer out = decoder.decode(buffer); 1743 String expected = "\ufffd buffer"; 1744 if(!expected.equals(new String(out.array()))){ 1745 errln("Did not get the expected result for substitution chars. Got: "+ 1746 new String(out.array()) + "("+ hex(out.array())+")"); 1747 } 1748 logln("Output: "+ new String(out.array()) + "("+ hex(out.array())+")"); 1749 }catch (CharacterCodingException ex){ 1750 errln("Unexpected exception: "+ex.toString()); 1751 } 1752 } 1753 /* 1754 1755 @Test 1756 public void TestImplFlushFailure(){ 1757 1758 try{ 1759 CharBuffer in = CharBuffer.wrap("\u3005\u3006\u3007\u30FC\u2015\u2010\uFF0F"); 1760 CharsetEncoder encoder = new CharsetProviderICU().charsetForName("iso-2022-jp").newEncoder(); 1761 ByteBuffer out = ByteBuffer.allocate(30); 1762 encoder.encode(in, out, true); 1763 encoder.flush(out); 1764 if(out.position()!= 20){ 1765 errln("Did not get the expected position from flush"); 1766 } 1767 1768 }catch (Exception ex){ 1769 errln("Could not create encoder for iso-2022-jp exception: "+ex.toString()); 1770 } 1771 } 1772 */ 1773 1774 @Test TestISO88591()1775 public void TestISO88591() { 1776 1777 Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1"); 1778 if(cs!=null){ 1779 CharsetEncoder encoder = cs.newEncoder(); 1780 if(encoder!=null){ 1781 encoder.canEncode("\uc2a3"); 1782 }else{ 1783 errln("Could not create encoder for iso-8859-1"); 1784 } 1785 }else{ 1786 errln("Could not create Charset for iso-8859-1"); 1787 } 1788 1789 } 1790 1791 @Test TestUTF8Encode()1792 public void TestUTF8Encode() { 1793 // Test with a lead surrogate in the middle of the input text. 1794 // Java API behavior is unclear for surrogates at the end, see ticket #11546. 1795 CharBuffer in = CharBuffer.wrap("\ud800a"); 1796 ByteBuffer out = ByteBuffer.allocate(30); 1797 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1798 CoderResult result = encoderICU.encode(in, out, true); 1799 1800 if (result.isMalformed()) { 1801 logln("\\ud800 is malformed for ICU4JNI utf-8 encoder"); 1802 } else if (result.isUnderflow()) { 1803 errln("FAIL: \\ud800 is OK for ICU4JNI utf-8 encoder"); 1804 } 1805 1806 in.position(0); 1807 out.clear(); 1808 1809 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1810 result = encoderJDK.encode(in, out, true); 1811 if (result.isMalformed()) { 1812 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1813 } else if (result.isUnderflow()) { 1814 errln("BAD: \\ud800 is OK for JDK utf-8 encoder"); 1815 } 1816 } 1817 1818 /* private void printCB(CharBuffer buf){ 1819 buf.rewind(); 1820 while(buf.hasRemaining()){ 1821 System.out.println(hex(buf.get())); 1822 } 1823 buf.rewind(); 1824 } 1825 */ 1826 1827 @Test TestUTF8()1828 public void TestUTF8() throws CharacterCodingException{ 1829 try{ 1830 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1831 encoderICU.encode(CharBuffer.wrap("\ud800")); 1832 errln("\\ud800 is OK for ICU4JNI utf-8 encoder"); 1833 }catch (Exception e) { 1834 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1835 //e.printStackTrace(); 1836 } 1837 1838 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1839 try { 1840 encoderJDK.encode(CharBuffer.wrap("\ud800")); 1841 errln("\\ud800 is OK for JDK utf-8 encoder"); 1842 } catch (Exception e) { 1843 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1844 //e.printStackTrace(); 1845 } 1846 } 1847 1848 @Test TestUTF16Bom()1849 public void TestUTF16Bom(){ 1850 1851 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-16"); 1852 char[] in = new char[] { 0x1122, 0x2211, 0x3344, 0x4433, 1853 0x5566, 0x6655, 0x7788, 0x8877, 0x9900 }; 1854 CharBuffer inBuf = CharBuffer.allocate(in.length); 1855 inBuf.put(in); 1856 CharsetEncoder encoder = cs.newEncoder(); 1857 ByteBuffer outBuf = ByteBuffer.allocate(in.length*2+2); 1858 inBuf.rewind(); 1859 encoder.encode(inBuf, outBuf, true); 1860 outBuf.rewind(); 1861 if(outBuf.get(0)!= (byte)0xFE && outBuf.get(1)!= (byte)0xFF){ 1862 errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining()); 1863 } 1864 while(outBuf.hasRemaining()){ 1865 logln("0x"+hex(outBuf.get())); 1866 } 1867 CharsetDecoder decoder = cs.newDecoder(); 1868 outBuf.rewind(); 1869 CharBuffer rt = CharBuffer.allocate(in.length); 1870 CoderResult cr = decoder.decode(outBuf, rt, true); 1871 if(cr.isError()){ 1872 errln("Decoding with BOM failed. Error: "+ cr.toString()); 1873 } 1874 equals(rt, in); 1875 { 1876 rt.clear(); 1877 outBuf.rewind(); 1878 Charset utf16 = Charset.forName("UTF-16"); 1879 CharsetDecoder dc = utf16.newDecoder(); 1880 cr = dc.decode(outBuf, rt, true); 1881 equals(rt, in); 1882 } 1883 } 1884 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush)1885 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1886 boolean throwException, boolean flush) throws BufferOverflowException, Exception { 1887 smBufDecode(decoder, encoding, source, target, throwException, flush, true); 1888 } 1889 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray)1890 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1891 boolean throwException, boolean flush, boolean backedByArray) throws BufferOverflowException, Exception { 1892 smBufDecode(decoder, encoding, source, target, throwException, flush, backedByArray, -1); 1893 } 1894 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)1895 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1896 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) 1897 throws BufferOverflowException, Exception { 1898 ByteBuffer mySource; 1899 CharBuffer myTarget; 1900 if (backedByArray) { 1901 mySource = ByteBuffer.allocate(source.capacity()); 1902 myTarget = CharBuffer.allocate(target.capacity()); 1903 } else { 1904 // this does not guarantee by any means that mySource and myTarget 1905 // are not backed by arrays 1906 mySource = ByteBuffer.allocateDirect(source.capacity()); 1907 myTarget = ByteBuffer.allocateDirect(target.capacity() * 2).asCharBuffer(); 1908 } 1909 mySource.position(source.position()); 1910 for (int i = source.position(); i < source.limit(); i++) 1911 mySource.put(i, source.get(i)); 1912 1913 { 1914 decoder.reset(); 1915 myTarget.limit(target.limit()); 1916 mySource.limit(source.limit()); 1917 mySource.position(source.position()); 1918 CoderResult result = CoderResult.UNDERFLOW; 1919 result = decoder.decode(mySource, myTarget, true); 1920 if (flush) { 1921 result = decoder.flush(myTarget); 1922 } 1923 if (result.isError()) { 1924 if (throwException) { 1925 throw new Exception(); 1926 } 1927 errln("Test complete buffers while decoding failed. " + result.toString()); 1928 return; 1929 } 1930 if (result.isOverflow()) { 1931 if (throwException) { 1932 throw new BufferOverflowException(); 1933 } 1934 errln("Test complete buffers while decoding threw overflow exception"); 1935 return; 1936 } 1937 myTarget.limit(myTarget.position()); 1938 myTarget.position(0); 1939 target.position(0); 1940 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1941 errln(" Test complete buffers while decoding " + encoding + " TO Unicode--failed"); 1942 } 1943 } 1944 if (isQuick()) { 1945 return; 1946 } 1947 { 1948 decoder.reset(); 1949 myTarget.limit(target.position()); 1950 mySource.limit(source.position()); 1951 mySource.position(source.position()); 1952 myTarget.clear(); 1953 myTarget.position(0); 1954 1955 int inputLen = mySource.remaining(); 1956 1957 CoderResult result = CoderResult.UNDERFLOW; 1958 for (int i = 1; i <= inputLen; i++) { 1959 mySource.limit(i); 1960 if (i == inputLen) { 1961 result = decoder.decode(mySource, myTarget, true); 1962 } else { 1963 result = decoder.decode(mySource, myTarget, false); 1964 } 1965 if (result.isError()) { 1966 errln("Test small input buffers while decoding failed. " + result.toString()); 1967 break; 1968 } 1969 if (result.isOverflow()) { 1970 if (throwException) { 1971 throw new BufferOverflowException(); 1972 } 1973 errln("Test small input buffers while decoding threw overflow exception"); 1974 break; 1975 } 1976 1977 } 1978 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1979 errln("Test small input buffers while decoding " + encoding + " TO Unicode--failed"); 1980 } 1981 } 1982 { 1983 decoder.reset(); 1984 myTarget.limit(0); 1985 mySource.limit(0); 1986 mySource.position(source.position()); 1987 myTarget.clear(); 1988 while (true) { 1989 CoderResult result = decoder.decode(mySource, myTarget, false); 1990 if (result.isUnderflow()) { 1991 if (mySource.limit() < source.limit()) 1992 mySource.limit(mySource.limit() + 1); 1993 } else if (result.isOverflow()) { 1994 if (myTarget.limit() < target.limit()) 1995 myTarget.limit(myTarget.limit() + 1); 1996 else 1997 break; 1998 } else /*if (result.isError())*/ { 1999 errln("Test small output buffers while decoding " + result.toString()); 2000 } 2001 if (mySource.position() == mySource.limit()) { 2002 result = decoder.decode(mySource, myTarget, true); 2003 if (result.isError()) { 2004 errln("Test small output buffers while decoding " + result.toString()); 2005 } 2006 result = decoder.flush(myTarget); 2007 if (result.isError()) { 2008 errln("Test small output buffers while decoding " + result.toString()); 2009 } 2010 break; 2011 } 2012 } 2013 2014 if (!equals(myTarget, target, targetLimit)) { 2015 errln("Test small output buffers " + encoding + " TO Unicode failed"); 2016 } 2017 } 2018 } 2019 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush)2020 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2021 boolean throwException, boolean flush) throws Exception, BufferOverflowException { 2022 smBufEncode(encoder, encoding, source, target, throwException, flush, true); 2023 } 2024 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray)2025 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2026 boolean throwException, boolean flush, boolean backedByArray) throws Exception, BufferOverflowException { 2027 smBufEncode(encoder, encoding, source, target, throwException, flush, true, -1); 2028 } 2029 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)2030 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2031 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) throws Exception, 2032 BufferOverflowException { 2033 logln("Running smBufEncode for " + encoding + " with class " + encoder); 2034 2035 CharBuffer mySource; 2036 ByteBuffer myTarget; 2037 if (backedByArray) { 2038 mySource = CharBuffer.allocate(source.capacity()); 2039 myTarget = ByteBuffer.allocate(target.capacity()); 2040 } else { 2041 mySource = ByteBuffer.allocateDirect(source.capacity() * 2).asCharBuffer(); 2042 myTarget = ByteBuffer.allocateDirect(target.capacity()); 2043 } 2044 mySource.position(source.position()); 2045 for (int i = source.position(); i < source.limit(); i++) 2046 mySource.put(i, source.get(i)); 2047 2048 myTarget.clear(); 2049 { 2050 logln("Running tests on small input buffers for " + encoding); 2051 encoder.reset(); 2052 myTarget.limit(target.limit()); 2053 mySource.limit(source.limit()); 2054 mySource.position(source.position()); 2055 CoderResult result = null; 2056 2057 result = encoder.encode(mySource, myTarget, true); 2058 if (flush) { 2059 result = encoder.flush(myTarget); 2060 } 2061 2062 if (result.isError()) { 2063 if (throwException) { 2064 throw new Exception(); 2065 } 2066 errln("Test complete while encoding failed. " + result.toString()); 2067 } 2068 if (result.isOverflow()) { 2069 if (throwException) { 2070 throw new BufferOverflowException(); 2071 } 2072 errln("Test complete while encoding threw overflow exception"); 2073 } 2074 if (!equals(myTarget, target, targetLimit)) { 2075 errln("Test complete buffers while encoding for " + encoding + " failed"); 2076 2077 } else { 2078 logln("Tests complete buffers for " + encoding + " passed"); 2079 } 2080 } 2081 if (isQuick()) { 2082 return; 2083 } 2084 { 2085 logln("Running tests on small input buffers for " + encoding); 2086 encoder.reset(); 2087 myTarget.clear(); 2088 myTarget.limit(target.limit()); 2089 mySource.limit(source.limit()); 2090 mySource.position(source.position()); 2091 int inputLen = mySource.limit(); 2092 CoderResult result = null; 2093 for (int i = 1; i <= inputLen; i++) { 2094 mySource.limit(i); 2095 result = encoder.encode(mySource, myTarget, false); 2096 if (result.isError()) { 2097 errln("Test small input buffers while encoding failed. " + result.toString()); 2098 } 2099 if (result.isOverflow()) { 2100 if (throwException) { 2101 throw new BufferOverflowException(); 2102 } 2103 errln("Test small input buffers while encoding threw overflow exception"); 2104 } 2105 } 2106 if (!equals(myTarget, target, targetLimit)) { 2107 errln("Test small input buffers " + encoding + " From Unicode failed"); 2108 } else { 2109 logln("Tests on small input buffers for " + encoding + " passed"); 2110 } 2111 } 2112 { 2113 logln("Running tests on small output buffers for " + encoding); 2114 encoder.reset(); 2115 myTarget.clear(); 2116 myTarget.limit(target.limit()); 2117 mySource.limit(source.limit()); 2118 mySource.position(source.position()); 2119 mySource.position(0); 2120 myTarget.position(0); 2121 2122 logln("myTarget.limit: " + myTarget.limit() + " myTarget.capcity: " + myTarget.capacity()); 2123 2124 while (true) { 2125 int pos = myTarget.position(); 2126 2127 CoderResult result = encoder.encode(mySource, myTarget, false); 2128 logln("myTarget.Position: " + pos + " myTarget.limit: " + myTarget.limit()); 2129 logln("mySource.position: " + mySource.position() + " mySource.limit: " + mySource.limit()); 2130 2131 if (result.isError()) { 2132 errln("Test small output buffers while encoding " + result.toString()); 2133 } 2134 if (mySource.position() == mySource.limit()) { 2135 result = encoder.encode(mySource, myTarget, true); 2136 if (result.isError()) { 2137 errln("Test small output buffers while encoding " + result.toString()); 2138 } 2139 2140 myTarget.limit(myTarget.capacity()); 2141 result = encoder.flush(myTarget); 2142 if (result.isError()) { 2143 errln("Test small output buffers while encoding " + result.toString()); 2144 } 2145 break; 2146 } 2147 } 2148 if (!equals(myTarget, target, targetLimit)) { 2149 errln("Test small output buffers " + encoding + " From Unicode failed."); 2150 } 2151 logln("Tests on small output buffers for " + encoding + " passed"); 2152 } 2153 } 2154 2155 2156 //TODO 2157 /* 2158 @Test 2159 public void TestString(ByteBuffer bSource, CharBuffer uSource) throws Exception { 2160 try { 2161 { 2162 String source = uSource.toString(); 2163 byte[] target = source.getBytes(m_encoding); 2164 if (!equals(target, bSource.array())) { 2165 errln("encode using string API failed"); 2166 } 2167 } 2168 { 2169 2170 String target = new String(bSource.array(), m_encoding); 2171 if (!equals(uSource, target.toCharArray())) { 2172 errln("decode using string API failed"); 2173 } 2174 } 2175 } catch (Exception e) { 2176 //e.printStackTrace(); 2177 errln(e.getMessage()); 2178 } 2179 } 2180 2181 /*private void fromUnicodeTest() throws Exception { 2182 2183 logln("Loaded Charset: " + charset.getClass().toString()); 2184 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2185 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2186 2187 ByteBuffer myTarget = ByteBuffer.allocate(gbSource.length); 2188 logln("Created ByteBuffer of length: " + uSource.length); 2189 CharBuffer mySource = CharBuffer.wrap(uSource); 2190 logln("Wrapped ByteBuffer with CharBuffer "); 2191 encoder.reset(); 2192 logln("Test Unicode to " + encoding ); 2193 encoder.encode(mySource, myTarget, true); 2194 if (!equals(myTarget, gbSource)) { 2195 errln("--Test Unicode to " + encoding + ": FAILED"); 2196 } 2197 logln("Test Unicode to " + encoding +" passed"); 2198 } 2199 2200 @Test 2201 public void TestToUnicode( ) throws Exception { 2202 2203 logln("Loaded Charset: " + charset.getClass().toString()); 2204 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2205 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2206 2207 CharBuffer myTarget = CharBuffer.allocate(uSource.length); 2208 ByteBuffer mySource = ByteBuffer.wrap(getByteArray(gbSource)); 2209 decoder.reset(); 2210 CoderResult result = decoder.decode(mySource, myTarget, true); 2211 if (result.isError()) { 2212 errln("Test ToUnicode -- FAILED"); 2213 } 2214 if (!equals(myTarget, uSource)) { 2215 errln("--Test " + encoding + " to Unicode :FAILED"); 2216 } 2217 } 2218 2219 public static byte[] getByteArray(char[] source) { 2220 byte[] target = new byte[source.length]; 2221 int i = source.length; 2222 for (; --i >= 0;) { 2223 target[i] = (byte) source[i]; 2224 } 2225 return target; 2226 } 2227 /* 2228 private void smBufCharset(Charset charset) { 2229 try { 2230 ByteBuffer bTarget = charset.encode(CharBuffer.wrap(uSource)); 2231 CharBuffer uTarget = 2232 charset.decode(ByteBuffer.wrap(getByteArray(gbSource))); 2233 2234 if (!equals(uTarget, uSource)) { 2235 errln("Test " + charset.toString() + " to Unicode :FAILED"); 2236 } 2237 if (!equals(bTarget, gbSource)) { 2238 errln("Test " + charset.toString() + " from Unicode :FAILED"); 2239 } 2240 } catch (Exception ex) { 2241 errln("Encountered exception in smBufCharset"); 2242 } 2243 } 2244 2245 @Test 2246 public void TestMultithreaded() throws Exception { 2247 final Charset cs = Charset.forName(encoding); 2248 if (cs == charset) { 2249 errln("The objects are equal"); 2250 } 2251 smBufCharset(cs); 2252 try { 2253 final Thread t1 = new Thread() { 2254 public void run() { 2255 // commented out since the methods on 2256 // Charset API are supposed to be thread 2257 // safe ... to test it we don't sync 2258 2259 // synchronized(charset){ 2260 while (!interrupted()) { 2261 try { 2262 smBufCharset(cs); 2263 } catch (UnsupportedCharsetException ueEx) { 2264 errln(ueEx.toString()); 2265 } 2266 } 2267 2268 // } 2269 } 2270 }; 2271 final Thread t2 = new Thread() { 2272 public void run() { 2273 // synchronized(charset){ 2274 while (!interrupted()) { 2275 try { 2276 smBufCharset(cs); 2277 } catch (UnsupportedCharsetException ueEx) { 2278 errln(ueEx.toString()); 2279 } 2280 } 2281 2282 //} 2283 } 2284 }; 2285 t1.start(); 2286 t2.start(); 2287 int i = 0; 2288 for (;;) { 2289 if (i > 1000000000) { 2290 try { 2291 t1.interrupt(); 2292 } catch (Exception e) { 2293 } 2294 try { 2295 t2.interrupt(); 2296 } catch (Exception e) { 2297 } 2298 break; 2299 } 2300 i++; 2301 } 2302 } catch (Exception e) { 2303 throw e; 2304 } 2305 } 2306 2307 @Test 2308 public void TestSynchronizedMultithreaded() throws Exception { 2309 // Methods on CharsetDecoder and CharsetEncoder classes 2310 // are inherently unsafe if accessed by multiple concurrent 2311 // thread so we synchronize them 2312 final Charset charset = Charset.forName(encoding); 2313 final CharsetDecoder decoder = charset.newDecoder(); 2314 final CharsetEncoder encoder = charset.newEncoder(); 2315 try { 2316 final Thread t1 = new Thread() { 2317 public void run() { 2318 while (!interrupted()) { 2319 try { 2320 synchronized (encoder) { 2321 smBufEncode(encoder, encoding); 2322 } 2323 synchronized (decoder) { 2324 smBufDecode(decoder, encoding); 2325 } 2326 } catch (UnsupportedCharsetException ueEx) { 2327 errln(ueEx.toString()); 2328 } 2329 } 2330 2331 } 2332 }; 2333 final Thread t2 = new Thread() { 2334 public void run() { 2335 while (!interrupted()) { 2336 try { 2337 synchronized (encoder) { 2338 smBufEncode(encoder, encoding); 2339 } 2340 synchronized (decoder) { 2341 smBufDecode(decoder, encoding); 2342 } 2343 } catch (UnsupportedCharsetException ueEx) { 2344 errln(ueEx.toString()); 2345 } 2346 } 2347 } 2348 }; 2349 t1.start(); 2350 t2.start(); 2351 int i = 0; 2352 for (;;) { 2353 if (i > 1000000000) { 2354 try { 2355 t1.interrupt(); 2356 } catch (Exception e) { 2357 } 2358 try { 2359 t2.interrupt(); 2360 } catch (Exception e) { 2361 } 2362 break; 2363 } 2364 i++; 2365 } 2366 } catch (Exception e) { 2367 throw e; 2368 } 2369 } 2370 */ 2371 2372 @Test TestMBCS()2373 public void TestMBCS(){ 2374 { 2375 // Encoder: from Unicode conversion 2376 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("ibm-971").newEncoder(); 2377 ByteBuffer out = ByteBuffer.allocate(6); 2378 encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE); 2379 CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true); 2380 if(!result.isError()){ 2381 byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; 2382 if(!equals(expected, out.array())){ 2383 errln("Did not get the expected result for substitution bytes. Got: "+ 2384 hex(out.array())); 2385 } 2386 logln("Output: "+ hex(out.array())); 2387 }else{ 2388 errln("Encode operation failed for encoder: "+encoderICU.toString()); 2389 } 2390 } 2391 { 2392 // Decoder: to Unicode conversion 2393 CharsetDecoder decoderICU = new CharsetProviderICU().charsetForName("ibm-971").newDecoder(); 2394 CharBuffer out = CharBuffer.allocate(3); 2395 decoderICU.onMalformedInput(CodingErrorAction.REPLACE); 2396 CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true); 2397 if(!result.isError()){ 2398 char[] expected = {'\u00a1', '\ufffd', '\u6676'}; 2399 if(!equals(expected, out.array())){ 2400 errln("Did not get the expected result for substitution chars. Got: "+ 2401 hex(out.array())); 2402 } 2403 logln("Output: "+ hex(out.array())); 2404 }else{ 2405 errln("Decode operation failed for encoder: "+decoderICU.toString()); 2406 } 2407 } 2408 } 2409 2410 @Test TestJB4897()2411 public void TestJB4897(){ 2412 CharsetProviderICU provider = new CharsetProviderICU(); 2413 Charset charset = provider.charsetForName("x-abracadabra"); 2414 if(charset!=null && charset.canEncode()== true){ 2415 errln("provider.charsetForName() does not validate the charset names" ); 2416 } 2417 } 2418 2419 @Test TestJB5027()2420 public void TestJB5027() { 2421 CharsetProviderICU provider= new CharsetProviderICU(); 2422 2423 Charset fake = provider.charsetForName("doesNotExist"); 2424 if(fake != null){ 2425 errln("\"doesNotExist\" returned " + fake); 2426 } 2427 Charset xfake = provider.charsetForName("x-doesNotExist"); 2428 if(xfake!=null){ 2429 errln("\"x-doesNotExist\" returned " + xfake); 2430 } 2431 } 2432 2433 //test to make sure that number of aliases and canonical names are in the charsets that are in 2434 @Test TestAllNames()2435 public void TestAllNames() { 2436 2437 CharsetProviderICU provider= new CharsetProviderICU(); 2438 Object[] available = CharsetProviderICU.getAvailableNames(); 2439 for(int i=0; i<available.length;i++){ 2440 try{ 2441 String canon = CharsetProviderICU.getICUCanonicalName((String)available[i]); 2442 2443 // ',' is not allowed by Java's charset name checker 2444 if(canon.indexOf(',')>=0){ 2445 continue; 2446 } 2447 Charset cs = provider.charsetForName((String)available[i]); 2448 2449 Object[] javaAliases = cs.aliases().toArray(); 2450 //seach for ICU canonical name in javaAliases 2451 boolean inAliasList = false; 2452 for(int j=0; j<javaAliases.length; j++){ 2453 String java = (String) javaAliases[j]; 2454 if(java.equals(canon)){ 2455 logln("javaAlias: " + java + " canon: " + canon); 2456 inAliasList = true; 2457 } 2458 } 2459 if(inAliasList == false){ 2460 errln("Could not find ICU canonical name: "+canon+ " for java canonical name: "+ available[i]+ " "+ i); 2461 } 2462 }catch(UnsupportedCharsetException ex){ 2463 errln("could no load charset "+ available[i]+" "+ex.getMessage()); 2464 continue; 2465 } 2466 } 2467 } 2468 2469 @Test TestDecoderImplFlush()2470 public void TestDecoderImplFlush() { 2471 CharsetProviderICU provider = new CharsetProviderICU(); 2472 Charset ics = provider.charsetForName("UTF-16"); 2473 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2474 execDecoder(jcs); 2475 execDecoder(ics); 2476 } 2477 2478 @Test TestEncoderImplFlush()2479 public void TestEncoderImplFlush() { 2480 CharsetProviderICU provider = new CharsetProviderICU(); 2481 Charset ics = provider.charsetForName("UTF-16"); 2482 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2483 execEncoder(jcs); 2484 execEncoder(ics); 2485 } execDecoder(Charset cs)2486 private void execDecoder(Charset cs){ 2487 CharsetDecoder decoder = cs.newDecoder(); 2488 decoder.onMalformedInput(CodingErrorAction.REPORT); 2489 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2490 CharBuffer out = CharBuffer.allocate(10); 2491 CoderResult result = decoder.decode(ByteBuffer.wrap(new byte[] { -1, 2492 -2, 32, 0, 98 }), out, false); 2493 result = decoder.decode(ByteBuffer.wrap(new byte[] { 98 }), out, true); 2494 2495 logln(cs.getClass().toString()+ ":" +result.toString()); 2496 try { 2497 result = decoder.flush(out); 2498 logln(cs.getClass().toString()+ ":" +result.toString()); 2499 } catch (Exception e) { 2500 errln(e.getMessage()+" "+cs.getClass().toString()); 2501 } 2502 } execEncoder(Charset cs)2503 private void execEncoder(Charset cs){ 2504 CharsetEncoder encoder = cs.newEncoder(); 2505 encoder.onMalformedInput(CodingErrorAction.REPORT); 2506 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2507 ByteBuffer out = ByteBuffer.allocate(10); 2508 CoderResult result = encoder.encode(CharBuffer.wrap(new char[] { '\uFFFF', 2509 '\u2345', 32, 98 }), out, false); 2510 logln(cs.getClass().toString()+ ":" +result.toString()); 2511 result = encoder.encode(CharBuffer.wrap(new char[] { 98 }), out, true); 2512 2513 logln(cs.getClass().toString()+ ":" +result.toString()); 2514 try { 2515 result = encoder.flush(out); 2516 logln(cs.getClass().toString()+ ":" +result.toString()); 2517 } catch (Exception e) { 2518 errln(e.getMessage()+" "+cs.getClass().toString()); 2519 } 2520 } 2521 2522 @Test TestDecodeMalformed()2523 public void TestDecodeMalformed() { 2524 CharsetProviderICU provider = new CharsetProviderICU(); 2525 Charset ics = provider.charsetForName("UTF-16BE"); 2526 //Use SUN's charset 2527 Charset jcs = Charset.forName("UTF-16"); 2528 CoderResult ir = execMalformed(ics); 2529 CoderResult jr = execMalformed(jcs); 2530 if(ir!=jr){ 2531 errln("ICU's decoder did not return the same result as Sun. ICU: "+ir.toString()+" Sun: "+jr.toString()); 2532 } 2533 } 2534 execMalformed(Charset cs)2535 private CoderResult execMalformed(Charset cs){ 2536 CharsetDecoder decoder = cs.newDecoder(); 2537 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2538 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2539 ByteBuffer in = ByteBuffer.wrap(new byte[] { 0x00, 0x41, 0x00, 0x42, 0x01 }); 2540 CharBuffer out = CharBuffer.allocate(3); 2541 return decoder.decode(in, out, true); 2542 } 2543 2544 @Test TestJavaUTF16Decoder()2545 public void TestJavaUTF16Decoder(){ 2546 CharsetProviderICU provider = new CharsetProviderICU(); 2547 Charset ics = provider.charsetForName("UTF-16BE"); 2548 //Use SUN's charset 2549 Charset jcs = Charset.forName("UTF-16"); 2550 Exception ie = execConvertAll(ics); 2551 Exception je = execConvertAll(jcs); 2552 if(ie!=je){ 2553 errln("ICU's decoder did not return the same result as Sun. ICU: "+ie.toString()+" Sun: "+je.toString()); 2554 } 2555 } execConvertAll(Charset cs)2556 private Exception execConvertAll(Charset cs){ 2557 ByteBuffer in = ByteBuffer.allocate(400); 2558 int i=0; 2559 while(in.position()!=in.capacity()){ 2560 in.put((byte)0xD8); 2561 in.put((byte)i); 2562 in.put((byte)0xDC); 2563 in.put((byte)i); 2564 i++; 2565 } 2566 in.limit(in.position()); 2567 in.position(0); 2568 CharsetDecoder decoder = cs.newDecoder(); 2569 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2570 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2571 try{ 2572 CharBuffer out = decoder.decode(in); 2573 if(out!=null){ 2574 logln(cs.toString()+" encoing succeeded as expected!"); 2575 } 2576 }catch ( Exception ex){ 2577 errln("Did not get expected exception for encoding: "+cs.toString()); 2578 return ex; 2579 } 2580 return null; 2581 } 2582 2583 @Test TestUTF32BOM()2584 public void TestUTF32BOM(){ 2585 2586 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-32"); 2587 char[] in = new char[] { 0xd800, 0xdc00, 2588 0xd801, 0xdc01, 2589 0xdbff, 0xdfff, 2590 0xd900, 0xdd00, 2591 0x0000, 0x0041, 2592 0x0000, 0x0042, 2593 0x0000, 0x0043}; 2594 2595 CharBuffer inBuf = CharBuffer.allocate(in.length); 2596 inBuf.put(in); 2597 CharsetEncoder encoder = cs.newEncoder(); 2598 ByteBuffer outBuf = ByteBuffer.allocate(in.length*4+4); 2599 inBuf.rewind(); 2600 encoder.encode(inBuf, outBuf, true); 2601 outBuf.rewind(); 2602 if(outBuf.get(0)!= (byte)0x00 && outBuf.get(1)!= (byte)0x00 && 2603 outBuf.get(2)!= (byte)0xFF && outBuf.get(3)!= (byte)0xFE){ 2604 errln("The UTF32 encoder did not appended bom. Length returned: " + outBuf.remaining()); 2605 } 2606 while(outBuf.hasRemaining()){ 2607 logln("0x"+hex(outBuf.get())); 2608 } 2609 CharsetDecoder decoder = cs.newDecoder(); 2610 outBuf.limit(outBuf.position()); 2611 outBuf.rewind(); 2612 CharBuffer rt = CharBuffer.allocate(in.length); 2613 CoderResult cr = decoder.decode(outBuf, rt, true); 2614 if(cr.isError()){ 2615 errln("Decoding with BOM failed. Error: "+ cr.toString()); 2616 } 2617 equals(rt, in); 2618 try{ 2619 rt.clear(); 2620 outBuf.rewind(); 2621 Charset utf16 = Charset.forName("UTF-32"); 2622 CharsetDecoder dc = utf16.newDecoder(); 2623 cr = dc.decode(outBuf, rt, true); 2624 equals(rt, in); 2625 }catch(UnsupportedCharsetException ex){ 2626 // swallow the expection. 2627 } 2628 } 2629 2630 /* 2631 * Michael Ow 2632 * Modified 070424 2633 */ 2634 /*The following two methods provides the option of exceptions when Decoding 2635 * and Encoding if needed for testing purposes. 2636 */ smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target)2637 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target) { 2638 smBufDecode(decoder, encoding, source, target, true); 2639 } smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray)2640 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray) { 2641 try { 2642 smBufDecode(decoder, encoding, source, target, false, false, backedByArray); 2643 } 2644 catch (Exception ex) { 2645 System.out.println("!exception!"); 2646 } 2647 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target)2648 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target) { 2649 smBufEncode(encoder, encoding, source, target, true); 2650 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray)2651 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray) { 2652 try { 2653 smBufEncode(encoder, encoding, source, target, false, false); 2654 } 2655 catch (Exception ex) { 2656 System.out.println("!exception!"); 2657 } 2658 } 2659 2660 //Test CharsetICUProvider 2661 @Test TestNullCanonicalName()2662 public void TestNullCanonicalName() { 2663 String enc = null; 2664 String canonicalName = CharsetProviderICU.getICUCanonicalName(enc); 2665 2666 if (canonicalName != null) { 2667 errln("getICUCanonicalName return a non-null string for given null string"); 2668 } 2669 } 2670 2671 @Test TestGetAllNames()2672 public void TestGetAllNames() { 2673 String[] names = null; 2674 2675 names = CharsetProviderICU.getAllNames(); 2676 2677 if (names == null) { 2678 errln("getAllNames returned a null string."); 2679 } 2680 } 2681 2682 //Test CharsetICU 2683 @Test TestCharsetContains()2684 public void TestCharsetContains() { 2685 boolean test; 2686 2687 CharsetProvider provider = new CharsetProviderICU(); 2688 Charset cs1 = provider.charsetForName("UTF-32"); 2689 Charset cs2 = null; 2690 2691 test = cs1.contains(cs2); 2692 2693 if (test != false) { 2694 errln("Charset.contains returned true for a null charset."); 2695 } 2696 2697 cs2 = CharsetICU.forNameICU("UTF-32"); 2698 2699 test = cs1.contains(cs2); 2700 2701 if (test != true) { 2702 errln("Charset.contains returned false for an identical charset."); 2703 } 2704 2705 cs2 = provider.charsetForName("UTF-8"); 2706 2707 test = cs1.contains(cs2); 2708 2709 if (test != false) { 2710 errln("Charset.contains returned true for a different charset."); 2711 } 2712 } 2713 2714 @Test TestCharsetICUNullCharsetName()2715 public void TestCharsetICUNullCharsetName() { 2716 String charsetName = null; 2717 2718 try { 2719 CharsetICU.forNameICU(charsetName); 2720 errln("CharsetICU.forName should have thown an exception after getting a null charsetName."); 2721 } 2722 catch(Exception ex) { 2723 } 2724 } 2725 2726 //Test CharsetASCII 2727 @Test TestCharsetASCIIOverFlow()2728 public void TestCharsetASCIIOverFlow() { 2729 int byteBufferLimit; 2730 int charBufferLimit; 2731 2732 CharsetProvider provider = new CharsetProviderICU(); 2733 Charset cs = provider.charsetForName("ASCII"); 2734 CharsetEncoder encoder = cs.newEncoder(); 2735 CharsetDecoder decoder = cs.newDecoder(); 2736 2737 CharBuffer charBuffer = CharBuffer.allocate(0x90); 2738 ByteBuffer byteBuffer = ByteBuffer.allocate(0x90); 2739 2740 CharBuffer charBufferTest = CharBuffer.allocate(0xb0); 2741 ByteBuffer byteBufferTest = ByteBuffer.allocate(0xb0); 2742 2743 for(int j=0;j<=0x7f; j++){ 2744 charBuffer.put((char)j); 2745 byteBuffer.put((byte)j); 2746 } 2747 2748 byteBuffer.limit(byteBufferLimit = byteBuffer.position()); 2749 byteBuffer.position(0); 2750 charBuffer.limit(charBufferLimit = charBuffer.position()); 2751 charBuffer.position(0); 2752 2753 //test for overflow 2754 byteBufferTest.limit(byteBufferLimit - 5); 2755 byteBufferTest.position(0); 2756 charBufferTest.limit(charBufferLimit - 5); 2757 charBufferTest.position(0); 2758 try { 2759 smBufDecode(decoder, "ASCII", byteBuffer, charBufferTest, true, false); 2760 errln("Overflow exception while decoding ASCII should have been thrown."); 2761 } 2762 catch(Exception ex) { 2763 } 2764 try { 2765 smBufEncode(encoder, "ASCII", charBuffer, byteBufferTest, true, false); 2766 errln("Overflow exception while encoding ASCII should have been thrown."); 2767 } 2768 catch (Exception ex) { 2769 } 2770 2771 // For better code coverage 2772 /* For better code coverage */ 2773 byte byteout[] = { 2774 (byte)0x01 2775 }; 2776 char charin[] = { 2777 (char)0x0001, (char)0x0002 2778 }; 2779 ByteBuffer bb = ByteBuffer.wrap(byteout); 2780 CharBuffer cb = CharBuffer.wrap(charin); 2781 // Cast up to CharSequence to insulate against the CharBuffer.subSequence() return type change 2782 // which makes code compiled for a newer JDK not run on an older one. 2783 CharBuffer cb2 = CharBuffer.wrap(((CharSequence)cb).subSequence(0, 2)); 2784 encoder.reset(); 2785 if (!(encoder.encode(cb2, bb, true)).isOverflow()) { 2786 errln("Overflow error while encoding ASCII should have occurred."); 2787 } 2788 } 2789 2790 //Test CharsetUTF7 2791 @Test TestCharsetUTF7()2792 public void TestCharsetUTF7() { 2793 CoderResult result = CoderResult.UNDERFLOW; 2794 CharsetProvider provider = new CharsetProviderICU(); 2795 Charset cs = provider.charsetForName("UTF-7"); 2796 CharsetEncoder encoder = cs.newEncoder(); 2797 CharsetDecoder decoder = cs.newDecoder(); 2798 2799 CharBuffer us = CharBuffer.allocate(0x100); 2800 ByteBuffer bs = ByteBuffer.allocate(0x100); 2801 2802 /* Unicode : A<not equal to Alpha Lamda>. */ 2803 /* UTF7: AImIDkQ. */ 2804 us.put((char)0x41); us.put((char)0x2262); us.put((char)0x391); us.put((char)0x39B); us.put((char)0x2e); 2805 bs.put((byte)0x41); bs.put((byte)0x2b); bs.put((byte)0x49); bs.put((byte)0x6d); 2806 bs.put((byte)0x49); bs.put((byte)0x44); bs.put((byte)0x6b); bs.put((byte)0x51); 2807 bs.put((byte)0x4f); bs.put((byte)0x62); bs.put((byte)0x2e); 2808 2809 bs.limit(bs.position()); 2810 bs.position(0); 2811 us.limit(us.position()); 2812 us.position(0); 2813 2814 smBufDecode(decoder, "UTF-7", bs, us); 2815 smBufEncode(encoder, "UTF-7", us, bs); 2816 2817 /* Testing UTF-7 toUnicode with substitute callbacks */ 2818 { 2819 byte [] bytesTestErrorConsumption = { 2820 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 2821 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 2822 2823 }; 2824 char [] unicodeTestErrorConsumption = { 2825 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 2826 }; 2827 bs = ByteBuffer.wrap(bytesTestErrorConsumption); 2828 us = CharBuffer.wrap(unicodeTestErrorConsumption); 2829 2830 CodingErrorAction savedMal = decoder.malformedInputAction(); 2831 CodingErrorAction savedUMap = decoder.unmappableCharacterAction(); 2832 decoder.onMalformedInput(CodingErrorAction.REPLACE); 2833 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 2834 smBufDecode(decoder, "UTF-7 DE Error Consumption", bs, us); 2835 decoder.onMalformedInput(savedMal); 2836 decoder.onUnmappableCharacter(savedUMap); 2837 } 2838 /* ticket 6151 */ 2839 CharBuffer smallus = CharBuffer.allocate(1); 2840 ByteBuffer bigbs = ByteBuffer.allocate(3); 2841 bigbs.put((byte)0x41); bigbs.put((byte)0x41); bigbs.put((byte)0x41); 2842 bigbs.position(0); 2843 try { 2844 smBufDecode(decoder, "UTF-7-DE-Overflow", bigbs, smallus, true, false); 2845 errln("Buffer Overflow exception should have been thrown while decoding UTF-7."); 2846 } catch (Exception ex) { 2847 } 2848 2849 //The rest of the code in this method is to provide better code coverage 2850 CharBuffer ccus = CharBuffer.allocate(0x10); 2851 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 2852 2853 //start of charset decoder code coverage code 2854 //test for accurate illegal and control character checking 2855 ccbs.put((byte)0x0D); ccbs.put((byte)0x05); 2856 ccus.put((char)0x0000); 2857 2858 ccbs.limit(ccbs.position()); 2859 ccbs.position(0); 2860 ccus.limit(ccus.position()); 2861 ccus.position(0); 2862 2863 try { 2864 smBufDecode(decoder, "UTF-7-CC-DE-1", ccbs, ccus, true, false); 2865 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2866 } 2867 catch (Exception ex) { 2868 } 2869 2870 ccbs.clear(); 2871 ccus.clear(); 2872 2873 //test for illegal base64 character 2874 ccbs.put((byte)0x2b); ccbs.put((byte)0xff); 2875 ccus.put((char)0x0000); 2876 2877 ccbs.limit(ccbs.position()); 2878 ccbs.position(0); 2879 ccus.limit(ccus.position()); 2880 ccus.position(0); 2881 2882 try { 2883 smBufDecode(decoder, "UTF-7-CC-DE-2", ccbs, ccus, true, false); 2884 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2885 } 2886 catch (Exception ex) { 2887 } 2888 2889 ccbs.clear(); 2890 ccus.clear(); 2891 2892 //test for illegal order of the base64 character sequence 2893 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 2894 ccus.put((char)0x0000); ccus.put((char)0x0000); 2895 2896 ccbs.limit(ccbs.position()); 2897 ccbs.position(0); 2898 ccus.limit(ccus.position()); 2899 ccus.position(0); 2900 2901 try { 2902 smBufDecode(decoder, "UTF-7-CC-DE-3", ccbs, ccus, true, false); 2903 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2904 } 2905 catch (Exception ex) { 2906 } 2907 2908 ccbs.clear(); 2909 ccus.clear(); 2910 2911 //test for illegal order of the base64 character sequence 2912 ccbs.put((byte)0x2b); ccbs.put((byte)0x0a); ccbs.put((byte)0x09); 2913 ccus.put((char)0x0000); 2914 2915 ccbs.limit(ccbs.position()); 2916 ccbs.position(0); 2917 ccus.limit(ccus.position()); 2918 ccus.position(0); 2919 2920 try { 2921 smBufDecode(decoder, "UTF-7-CC-DE-4", ccbs, ccus, true, false); 2922 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2923 } 2924 catch (Exception ex) { 2925 } 2926 2927 ccbs.clear(); 2928 ccus.clear(); 2929 2930 //test for illegal order of the base64 character sequence 2931 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x0a); 2932 ccus.put((char)0x0000); 2933 2934 ccbs.limit(ccbs.position()); 2935 ccbs.position(0); 2936 ccus.limit(ccus.position()); 2937 ccus.position(0); 2938 2939 try { 2940 smBufDecode(decoder, "UTF-7-CC-DE-5", ccbs, ccus, true, false); 2941 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2942 } 2943 catch (Exception ex) { 2944 } 2945 2946 ccbs.clear(); 2947 ccus.clear(); 2948 2949 //test for illegal order of the base64 character sequence 2950 ccbs.put((byte)0x2b); ccbs.put((byte)0x00); 2951 ccus.put((char)0x0000); 2952 2953 ccbs.limit(ccbs.position()); 2954 ccbs.position(0); 2955 ccus.limit(ccus.position()); 2956 ccus.position(0); 2957 2958 try { 2959 smBufDecode(decoder, "UTF-7-CC-DE-6", ccbs, ccus, true, false); 2960 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2961 } 2962 catch (Exception ex) { 2963 } 2964 2965 ccbs.clear(); 2966 ccus.clear(); 2967 2968 //test for overflow buffer error 2969 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); 2970 2971 ccbs.limit(ccbs.position()); 2972 ccbs.position(0); 2973 ccus.limit(0); 2974 ccus.position(0); 2975 2976 try { 2977 smBufDecode(decoder, "UTF-7-CC-DE-7", ccbs, ccus, true, false); 2978 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2979 } 2980 catch (Exception ex) { 2981 } 2982 2983 ccbs.clear(); 2984 ccus.clear(); 2985 2986 //test for overflow buffer error 2987 ccbs.put((byte)0x0c); ccbs.put((byte)0x0c); 2988 2989 ccbs.limit(ccbs.position()); 2990 ccbs.position(0); 2991 ccus.limit(0); 2992 ccus.position(0); 2993 2994 try { 2995 smBufDecode(decoder, "UTF-7-CC-DE-8", ccbs, ccus, true, false); 2996 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2997 } 2998 catch (Exception ex) { 2999 } 3000 //end of charset decoder code coverage code 3001 3002 //start of charset encoder code coverage code 3003 ccbs.clear(); 3004 ccus.clear(); 3005 //test for overflow buffer error 3006 ccus.put((char)0x002b); 3007 ccbs.put((byte)0x2b); 3008 3009 ccbs.limit(ccbs.position()); 3010 ccbs.position(0); 3011 ccus.limit(ccus.position()); 3012 ccus.position(0); 3013 3014 try { 3015 smBufEncode(encoder, "UTF-7-CC-EN-1", ccus, ccbs, true, false); 3016 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3017 } 3018 catch (Exception ex) { 3019 } 3020 3021 ccbs.clear(); 3022 ccus.clear(); 3023 3024 //test for overflow buffer error 3025 ccus.put((char)0x002b); ccus.put((char)0x2262); 3026 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3027 3028 ccbs.limit(ccbs.position()); 3029 ccbs.position(0); 3030 ccus.limit(ccus.position()); 3031 ccus.position(0); 3032 3033 try { 3034 smBufEncode(encoder, "UTF-7-CC-EN-2", ccus, ccbs, true, false); 3035 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3036 } 3037 catch (Exception ex) { 3038 } 3039 3040 ccbs.clear(); 3041 ccus.clear(); 3042 3043 //test for overflow buffer error 3044 ccus.put((char)0x2262); ccus.put((char)0x0049); 3045 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3046 ccbs.limit(ccbs.position()); 3047 ccbs.position(0); 3048 ccus.limit(ccus.position()); 3049 ccus.position(0); 3050 3051 try { 3052 smBufEncode(encoder, "UTF-7-CC-EN-3", ccus, ccbs, true, false); 3053 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3054 } 3055 catch (Exception ex) { 3056 } 3057 3058 ccbs.clear(); 3059 ccus.clear(); 3060 3061 //test for overflow buffer error 3062 ccus.put((char)0x2262); ccus.put((char)0x0395); 3063 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3064 ccbs.limit(ccbs.position()); 3065 ccbs.position(0); 3066 ccus.limit(ccus.position()); 3067 ccus.position(0); 3068 3069 try { 3070 smBufEncode(encoder, "UTF-7-CC-EN-4", ccus, ccbs, true, false); 3071 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3072 } 3073 catch (Exception ex) { 3074 } 3075 3076 ccbs.clear(); 3077 ccus.clear(); 3078 3079 //test for overflow buffer error 3080 ccus.put((char)0x2262); ccus.put((char)0x0395); 3081 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3082 ccbs.limit(ccbs.position()); 3083 ccbs.position(0); 3084 ccus.limit(ccus.position()); 3085 ccus.position(0); 3086 3087 try { 3088 smBufEncode(encoder, "UTF-7-CC-EN-5", ccus, ccbs, true, false); 3089 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3090 } 3091 catch (Exception ex) { 3092 } 3093 3094 ccbs.clear(); 3095 ccus.clear(); 3096 3097 //test for overflow buffer error 3098 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3099 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3100 ccbs.limit(ccbs.position()); 3101 ccbs.position(0); 3102 ccus.limit(ccus.position()); 3103 ccus.position(0); 3104 3105 try { 3106 smBufEncode(encoder, "UTF-7-CC-EN-6", ccus, ccbs, true, false); 3107 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3108 } 3109 catch (Exception ex) { 3110 } 3111 3112 ccbs.clear(); 3113 ccus.clear(); 3114 3115 //test for overflow buffer error 3116 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3117 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3118 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3119 ccbs.limit(ccbs.position()); 3120 ccbs.position(0); 3121 ccus.limit(ccus.position()); 3122 ccus.position(0); 3123 3124 try { 3125 smBufEncode(encoder, "UTF-7-CC-EN-7", ccus, ccbs, true, false); 3126 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3127 } 3128 catch (Exception ex) { 3129 } 3130 3131 ccbs.clear(); 3132 ccus.clear(); 3133 3134 //test for overflow buffer error 3135 ccus.put((char)0x0049); ccus.put((char)0x0048); 3136 ccbs.put((byte)0x00); 3137 ccbs.limit(ccbs.position()); 3138 ccbs.position(0); 3139 ccus.limit(ccus.position()); 3140 ccus.position(0); 3141 3142 try { 3143 smBufEncode(encoder, "UTF-7-CC-EN-8", ccus, ccbs, true, false); 3144 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3145 } 3146 catch (Exception ex) { 3147 } 3148 3149 ccbs.clear(); 3150 ccus.clear(); 3151 3152 //test for overflow buffer error 3153 ccus.put((char)0x2262); 3154 ccbs.put((byte)0x00); 3155 ccbs.limit(ccbs.position()); 3156 ccbs.position(0); 3157 ccus.limit(ccus.position()); 3158 ccus.position(0); 3159 3160 try { 3161 smBufEncode(encoder, "UTF-7-CC-EN-9", ccus, ccbs, true, false); 3162 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3163 } 3164 catch (Exception ex) { 3165 } 3166 3167 ccbs.clear(); 3168 ccus.clear(); 3169 3170 //test for overflow buffer error 3171 ccus.put((char)0x2262); ccus.put((char)0x0049); 3172 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3173 ccbs.limit(ccbs.position()); 3174 ccbs.position(0); 3175 ccus.limit(ccus.position()); 3176 ccus.position(0); 3177 3178 try { 3179 smBufEncode(encoder, "UTF-7-CC-EN-10", ccus, ccbs, true, false); 3180 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3181 } 3182 catch (Exception ex) { 3183 } 3184 3185 ccbs.clear(); 3186 ccus.clear(); 3187 3188 //test for overflow buffer error 3189 ccus.put((char)0x2262); 3190 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x6d); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 3191 3192 ccbs.limit(ccbs.position()); 3193 ccbs.position(0); 3194 ccus.limit(ccus.position()); 3195 ccus.position(0); 3196 try { 3197 smBufEncode(encoder, "UTF-7-CC-EN-11", ccus, ccbs, false, true); 3198 } catch (Exception ex) { 3199 errln("Exception while encoding UTF-7 code coverage test should not have been thrown."); 3200 } 3201 3202 ccbs.clear(); 3203 ccus.clear(); 3204 3205 //test for overflow buffer error 3206 encoder.reset(); 3207 ccus.put((char)0x3980); ccus.put((char)0x2715); 3208 ccbs.put((byte)0x2b); ccbs.put((byte)0x4f); ccbs.put((byte)0x59); ccbs.put((byte)0x2d); 3209 3210 ccbs.limit(ccbs.position()); 3211 ccbs.position(0); 3212 ccus.limit(ccus.position()); 3213 ccus.position(0); 3214 3215 result = encoder.encode(ccus, ccbs, true); 3216 result = encoder.flush(ccbs); 3217 if (!result.isOverflow()) { 3218 errln("Overflow buffer while encoding UTF-7 should have occurred."); 3219 } 3220 //end of charset encoder code coverage code 3221 } 3222 3223 @Test TestBug12956()3224 public void TestBug12956() { 3225 final CharsetProvider provider = new CharsetProviderICU(); 3226 final Charset cs_utf7 = provider.charsetForName("UTF-7"); 3227 final Charset cs_imap = provider.charsetForName("IMAP-mailbox-name"); 3228 final String test = "新"; 3229 final byte[] expected_utf7 = {0x2b, 0x5a, 0x62, 0x41, 0x2d}; 3230 final byte[] expected_imap = {0x26, 0x5a, 0x62, 0x41, 0x2d}; 3231 3232 byte[] bytes = test.getBytes(cs_utf7); 3233 if (!Arrays.equals(bytes, expected_utf7)) { 3234 errln("Incorrect UTF-7 conversion. Got " + new String(bytes) + " but expect " + 3235 new String(expected_utf7)); 3236 } 3237 3238 bytes = test.getBytes(cs_imap); 3239 if (!Arrays.equals(bytes, expected_imap)) { 3240 errln("Incorrect IMAP-mailbox-name conversion. Got " + new String(bytes) + 3241 " but expect " + new String(expected_imap)); 3242 } 3243 } 3244 3245 //Test Charset ISCII 3246 @Test TestCharsetISCII()3247 public void TestCharsetISCII() { 3248 CharsetProvider provider = new CharsetProviderICU(); 3249 Charset cs = provider.charsetForName("ISCII,version=0"); 3250 CharsetEncoder encoder = cs.newEncoder(); 3251 CharsetDecoder decoder = cs.newDecoder(); 3252 3253 CharBuffer us = CharBuffer.allocate(0x100); 3254 ByteBuffer bs = ByteBuffer.allocate(0x100); 3255 ByteBuffer bsr = ByteBuffer.allocate(0x100); 3256 3257 //test full range of Devanagari 3258 us.put((char)0x0901); us.put((char)0x0902); us.put((char)0x0903); us.put((char)0x0905); us.put((char)0x0906); us.put((char)0x0907); 3259 us.put((char)0x0908); us.put((char)0x0909); us.put((char)0x090A); us.put((char)0x090B); us.put((char)0x090E); us.put((char)0x090F); 3260 us.put((char)0x0910); us.put((char)0x090D); us.put((char)0x0912); us.put((char)0x0913); us.put((char)0x0914); us.put((char)0x0911); 3261 us.put((char)0x0915); us.put((char)0x0916); us.put((char)0x0917); us.put((char)0x0918); us.put((char)0x0919); us.put((char)0x091A); 3262 us.put((char)0x091B); us.put((char)0x091C); us.put((char)0x091D); us.put((char)0x091E); us.put((char)0x091F); us.put((char)0x0920); 3263 us.put((char)0x0921); us.put((char)0x0922); us.put((char)0x0923); us.put((char)0x0924); us.put((char)0x0925); us.put((char)0x0926); 3264 us.put((char)0x0927); us.put((char)0x0928); us.put((char)0x0929); us.put((char)0x092A); us.put((char)0x092B); us.put((char)0x092C); 3265 us.put((char)0x092D); us.put((char)0x092E); us.put((char)0x092F); us.put((char)0x095F); us.put((char)0x0930); us.put((char)0x0931); 3266 us.put((char)0x0932); us.put((char)0x0933); us.put((char)0x0934); us.put((char)0x0935); us.put((char)0x0936); us.put((char)0x0937); 3267 us.put((char)0x0938); us.put((char)0x0939); us.put((char)0x200D); us.put((char)0x093E); us.put((char)0x093F); us.put((char)0x0940); 3268 us.put((char)0x0941); us.put((char)0x0942); us.put((char)0x0943); us.put((char)0x0946); us.put((char)0x0947); us.put((char)0x0948); 3269 us.put((char)0x0945); us.put((char)0x094A); us.put((char)0x094B); us.put((char)0x094C); us.put((char)0x0949); us.put((char)0x094D); 3270 us.put((char)0x093D); us.put((char)0x0966); us.put((char)0x0967); us.put((char)0x0968); us.put((char)0x0969); us.put((char)0x096A); 3271 us.put((char)0x096B); us.put((char)0x096C); us.put((char)0x096D); us.put((char)0x096E); us.put((char)0x096F); 3272 3273 bs.put((byte)0xEF); bs.put((byte)0x42); 3274 bs.put((byte)0xA1); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xA4); bs.put((byte)0xA5); bs.put((byte)0xA6); 3275 bs.put((byte)0xA7); bs.put((byte)0xA8); bs.put((byte)0xA9); bs.put((byte)0xAA); bs.put((byte)0xAB); bs.put((byte)0xAC); 3276 bs.put((byte)0xAD); bs.put((byte)0xAE); bs.put((byte)0xAF); bs.put((byte)0xB0); bs.put((byte)0xB1); bs.put((byte)0xB2); 3277 bs.put((byte)0xB3); bs.put((byte)0xB4); bs.put((byte)0xB5); bs.put((byte)0xB6); bs.put((byte)0xB7); bs.put((byte)0xB8); 3278 bs.put((byte)0xB9); bs.put((byte)0xBA); bs.put((byte)0xBB); bs.put((byte)0xBC); bs.put((byte)0xBD); bs.put((byte)0xBE); 3279 bs.put((byte)0xBF); bs.put((byte)0xC0); bs.put((byte)0xC1); bs.put((byte)0xC2); bs.put((byte)0xC3); bs.put((byte)0xC4); 3280 bs.put((byte)0xC5); bs.put((byte)0xC6); bs.put((byte)0xC7); bs.put((byte)0xC8); bs.put((byte)0xC9); bs.put((byte)0xCA); 3281 bs.put((byte)0xCB); bs.put((byte)0xCC); bs.put((byte)0xCD); bs.put((byte)0xCE); bs.put((byte)0xCF); bs.put((byte)0xD0); 3282 bs.put((byte)0xD1); bs.put((byte)0xD2); bs.put((byte)0xD3); bs.put((byte)0xD4); bs.put((byte)0xD5); bs.put((byte)0xD6); 3283 bs.put((byte)0xD7); bs.put((byte)0xD8); bs.put((byte)0xD9); bs.put((byte)0xDA); bs.put((byte)0xDB); bs.put((byte)0xDC); 3284 bs.put((byte)0xDD); bs.put((byte)0xDE); bs.put((byte)0xDF); bs.put((byte)0xE0); bs.put((byte)0xE1); bs.put((byte)0xE2); 3285 bs.put((byte)0xE3); bs.put((byte)0xE4); bs.put((byte)0xE5); bs.put((byte)0xE6); bs.put((byte)0xE7); bs.put((byte)0xE8); 3286 bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xF1); bs.put((byte)0xF2); bs.put((byte)0xF3); bs.put((byte)0xF4); 3287 bs.put((byte)0xF5); bs.put((byte)0xF6); bs.put((byte)0xF7); bs.put((byte)0xF8); bs.put((byte)0xF9); bs.put((byte)0xFA); 3288 3289 bsr.put((byte)0xA1); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xA4); bsr.put((byte)0xA5); bsr.put((byte)0xA6); 3290 bsr.put((byte)0xA7); bsr.put((byte)0xA8); bsr.put((byte)0xA9); bsr.put((byte)0xAA); bsr.put((byte)0xAB); bsr.put((byte)0xAC); 3291 bsr.put((byte)0xAD); bsr.put((byte)0xAE); bsr.put((byte)0xAF); bsr.put((byte)0xB0); bsr.put((byte)0xB1); bsr.put((byte)0xB2); 3292 bsr.put((byte)0xB3); bsr.put((byte)0xB4); bsr.put((byte)0xB5); bsr.put((byte)0xB6); bsr.put((byte)0xB7); bsr.put((byte)0xB8); 3293 bsr.put((byte)0xB9); bsr.put((byte)0xBA); bsr.put((byte)0xBB); bsr.put((byte)0xBC); bsr.put((byte)0xBD); bsr.put((byte)0xBE); 3294 bsr.put((byte)0xBF); bsr.put((byte)0xC0); bsr.put((byte)0xC1); bsr.put((byte)0xC2); bsr.put((byte)0xC3); bsr.put((byte)0xC4); 3295 bsr.put((byte)0xC5); bsr.put((byte)0xC6); bsr.put((byte)0xC7); bsr.put((byte)0xC8); bsr.put((byte)0xC9); bsr.put((byte)0xCA); 3296 bsr.put((byte)0xCB); bsr.put((byte)0xCC); bsr.put((byte)0xCD); bsr.put((byte)0xCE); bsr.put((byte)0xCF); bsr.put((byte)0xD0); 3297 bsr.put((byte)0xD1); bsr.put((byte)0xD2); bsr.put((byte)0xD3); bsr.put((byte)0xD4); bsr.put((byte)0xD5); bsr.put((byte)0xD6); 3298 bsr.put((byte)0xD7); bsr.put((byte)0xD8); bsr.put((byte)0xD9); bsr.put((byte)0xDA); bsr.put((byte)0xDB); bsr.put((byte)0xDC); 3299 bsr.put((byte)0xDD); bsr.put((byte)0xDE); bsr.put((byte)0xDF); bsr.put((byte)0xE0); bsr.put((byte)0xE1); bsr.put((byte)0xE2); 3300 bsr.put((byte)0xE3); bsr.put((byte)0xE4); bsr.put((byte)0xE5); bsr.put((byte)0xE6); bsr.put((byte)0xE7); bsr.put((byte)0xE8); 3301 bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xF1); bsr.put((byte)0xF2); bsr.put((byte)0xF3); bsr.put((byte)0xF4); 3302 bsr.put((byte)0xF5); bsr.put((byte)0xF6); bsr.put((byte)0xF7); bsr.put((byte)0xF8); bsr.put((byte)0xF9); bsr.put((byte)0xFA); 3303 3304 //test Soft Halant 3305 us.put((char)0x0915); us.put((char)0x094d); us.put((char)0x200D); 3306 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE9); 3307 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE9); 3308 3309 //test explicit halant 3310 us.put((char)0x0915); us.put((char)0x094D); us.put((char)0x200C); 3311 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE8); 3312 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE8); 3313 3314 //test double danda 3315 us.put((char)0x0965); 3316 bs.put((byte)0xEA); bs.put((byte)0xEA); 3317 bsr.put((byte)0xEA); bsr.put((byte)0xEA); 3318 3319 //test ASCII 3320 us.put((char)0x1B); us.put((char)0x24); us.put((char)0x29); us.put((char)0x47); us.put((char)0x0E); us.put((char)0x23); 3321 us.put((char)0x21); us.put((char)0x23); us.put((char)0x22); us.put((char)0x23); us.put((char)0x23); us.put((char)0x23); 3322 us.put((char)0x24); us.put((char)0x23); us.put((char)0x25); us.put((char)0x23); us.put((char)0x26); us.put((char)0x23); 3323 us.put((char)0x27); us.put((char)0x23); us.put((char)0x28); us.put((char)0x23); us.put((char)0x29); us.put((char)0x23); 3324 us.put((char)0x2A); us.put((char)0x23); us.put((char)0x2B); us.put((char)0x0F); us.put((char)0x2F); us.put((char)0x2A); 3325 3326 bs.put((byte)0x1B); bs.put((byte)0x24); bs.put((byte)0x29); bs.put((byte)0x47); bs.put((byte)0x0E); bs.put((byte)0x23); 3327 bs.put((byte)0x21); bs.put((byte)0x23); bs.put((byte)0x22); bs.put((byte)0x23); bs.put((byte)0x23); bs.put((byte)0x23); 3328 bs.put((byte)0x24); bs.put((byte)0x23); bs.put((byte)0x25); bs.put((byte)0x23); bs.put((byte)0x26); bs.put((byte)0x23); 3329 bs.put((byte)0x27); bs.put((byte)0x23); bs.put((byte)0x28); bs.put((byte)0x23); bs.put((byte)0x29); bs.put((byte)0x23); 3330 bs.put((byte)0x2A); bs.put((byte)0x23); bs.put((byte)0x2B); bs.put((byte)0x0F); bs.put((byte)0x2F); bs.put((byte)0x2A); 3331 3332 bsr.put((byte)0x1B); bsr.put((byte)0x24); bsr.put((byte)0x29); bsr.put((byte)0x47); bsr.put((byte)0x0E); bsr.put((byte)0x23); 3333 bsr.put((byte)0x21); bsr.put((byte)0x23); bsr.put((byte)0x22); bsr.put((byte)0x23); bsr.put((byte)0x23); bsr.put((byte)0x23); 3334 bsr.put((byte)0x24); bsr.put((byte)0x23); bsr.put((byte)0x25); bsr.put((byte)0x23); bsr.put((byte)0x26); bsr.put((byte)0x23); 3335 bsr.put((byte)0x27); bsr.put((byte)0x23); bsr.put((byte)0x28); bsr.put((byte)0x23); bsr.put((byte)0x29); bsr.put((byte)0x23); 3336 bsr.put((byte)0x2A); bsr.put((byte)0x23); bsr.put((byte)0x2B); bsr.put((byte)0x0F); bsr.put((byte)0x2F); bsr.put((byte)0x2A); 3337 3338 //test from Lotus 3339 //Some of the Lotus ISCII code points have been changed or commented out. 3340 us.put((char)0x0061); us.put((char)0x0915); us.put((char)0x000D); us.put((char)0x000A); us.put((char)0x0996); us.put((char)0x0043); 3341 us.put((char)0x0930); us.put((char)0x094D); us.put((char)0x200D); us.put((char)0x0901); us.put((char)0x000D); us.put((char)0x000A); 3342 us.put((char)0x0905); us.put((char)0x0985); us.put((char)0x0043); us.put((char)0x0915); us.put((char)0x0921); us.put((char)0x002B); 3343 us.put((char)0x095F); 3344 bs.put((byte)0x61); bs.put((byte)0xB3); 3345 bs.put((byte)0x0D); bs.put((byte)0x0A); 3346 bs.put((byte)0xEF); bs.put((byte)0x42); 3347 bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xB4); bs.put((byte)0x43); 3348 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xCF); bs.put((byte)0xE8); bs.put((byte)0xE9); bs.put((byte)0xA1); bs.put((byte)0x0D); bs.put((byte)0x0A); bs.put((byte)0xEF); bs.put((byte)0x42); 3349 bs.put((byte)0xA4); bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xA4); bs.put((byte)0x43); bs.put((byte)0xEF); 3350 bs.put((byte)0x42); bs.put((byte)0xB3); bs.put((byte)0xBF); bs.put((byte)0x2B); 3351 bs.put((byte)0xCE); 3352 bsr.put((byte)0x61); bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xEF); bsr.put((byte)0x30); bsr.put((byte)0xB3); 3353 bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xB4); bsr.put((byte)0x43); 3354 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xCF); bsr.put((byte)0xE8); bsr.put((byte)0xD9); bsr.put((byte)0xEF); 3355 bsr.put((byte)0x42); bsr.put((byte)0xA1); bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3356 bsr.put((byte)0xA4); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xA4); bsr.put((byte)0x43); bsr.put((byte)0xEF); 3357 bsr.put((byte)0x42); bsr.put((byte)0xB3); bsr.put((byte)0xBF); bsr.put((byte)0x2B); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3358 bsr.put((byte)0xCE); 3359 //end of test from Lotus 3360 3361 //tamil range 3362 us.put((char)0x0B86); us.put((char)0x0B87); us.put((char)0x0B88); 3363 bs.put((byte)0xEF); bs.put((byte)0x44); bs.put((byte)0xA5); bs.put((byte)0xA6); bs.put((byte)0xA7); 3364 bsr.put((byte)0xEF); bsr.put((byte)0x44); bsr.put((byte)0xA5); bsr.put((byte)0xA6); bsr.put((byte)0xA7); 3365 3366 //telugu range 3367 us.put((char)0x0C05); us.put((char)0x0C02); us.put((char)0x0C03); us.put((char)0x0C31); 3368 bs.put((byte)0xEF); bs.put((byte)0x45); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xD0); 3369 bsr.put((byte)0xEF); bsr.put((byte)0x45); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xD0); 3370 3371 //kannada range 3372 us.put((char)0x0C85); us.put((char)0x0C82); us.put((char)0x0C83); 3373 bs.put((byte)0xEF); bs.put((byte)0x48); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); 3374 bsr.put((byte)0xEF); bsr.put((byte)0x48); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); 3375 3376 //test Abbr sign and Anudatta 3377 us.put((char)0x0970); us.put((char)0x0952); us.put((char)0x0960); us.put((char)0x0944); us.put((char)0x090C); us.put((char)0x0962); 3378 us.put((char)0x0961); us.put((char)0x0963); us.put((char)0x0950); us.put((char)0x093D); us.put((char)0x0958); us.put((char)0x0959); 3379 us.put((char)0x095A); us.put((char)0x095B); us.put((char)0x095C); us.put((char)0x095D); us.put((char)0x095E); us.put((char)0x0020); 3380 us.put((char)0x094D); us.put((char)0x0930); us.put((char)0x0000); us.put((char)0x00A0); 3381 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xF0); bs.put((byte)0xBF); bs.put((byte)0xF0); bs.put((byte)0xB8); 3382 bs.put((byte)0xAA); bs.put((byte)0xE9); bs.put((byte)0xDF); bs.put((byte)0xE9); bs.put((byte)0xA6); bs.put((byte)0xE9); 3383 bs.put((byte)0xDB); bs.put((byte)0xE9); bs.put((byte)0xA7); bs.put((byte)0xE9); bs.put((byte)0xDC); bs.put((byte)0xE9); 3384 bs.put((byte)0xA1); bs.put((byte)0xE9); bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xB3); bs.put((byte)0xE9); 3385 bs.put((byte)0xB4); bs.put((byte)0xE9); bs.put((byte)0xB5); bs.put((byte)0xE9); bs.put((byte)0xBA); bs.put((byte)0xE9); 3386 bs.put((byte)0xBF); bs.put((byte)0xE9); bs.put((byte)0xC0); bs.put((byte)0xE9); bs.put((byte)0xC9); bs.put((byte)0xE9); 3387 bs.put((byte)0x20); bs.put((byte)0xE8); bs.put((byte)0xCF); bs.put((byte)0x00); bs.put((byte)0xA0); 3388 //bs.put((byte)0xEF); bs.put((byte)0x30); 3389 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xF0); bsr.put((byte)0xBF); bsr.put((byte)0xF0); bsr.put((byte)0xB8); 3390 bsr.put((byte)0xAA); bsr.put((byte)0xE9); bsr.put((byte)0xDF); bsr.put((byte)0xE9); bsr.put((byte)0xA6); bsr.put((byte)0xE9); 3391 bsr.put((byte)0xDB); bsr.put((byte)0xE9); bsr.put((byte)0xA7); bsr.put((byte)0xE9); bsr.put((byte)0xDC); bsr.put((byte)0xE9); 3392 bsr.put((byte)0xA1); bsr.put((byte)0xE9); bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xB3); bsr.put((byte)0xE9); 3393 bsr.put((byte)0xB4); bsr.put((byte)0xE9); bsr.put((byte)0xB5); bsr.put((byte)0xE9); bsr.put((byte)0xBA); bsr.put((byte)0xE9); 3394 bsr.put((byte)0xBF); bsr.put((byte)0xE9); bsr.put((byte)0xC0); bsr.put((byte)0xE9); bsr.put((byte)0xC9); bsr.put((byte)0xE9); 3395 bsr.put((byte)0xD9); bsr.put((byte)0xE8); bsr.put((byte)0xCF); bsr.put((byte)0x00); bsr.put((byte)0xA0); 3396 3397 bs.limit(bs.position()); 3398 bs.position(0); 3399 us.limit(us.position()); 3400 us.position(0); 3401 bsr.limit(bsr.position()); 3402 bsr.position(0); 3403 3404 //round trip test 3405 try { 3406 smBufDecode(decoder, "ISCII-part1", bsr, us, false, true); 3407 smBufEncode(encoder, "ISCII-part2", us, bs); 3408 smBufDecode(decoder, "ISCII-part3", bs, us, false, true); 3409 } catch (Exception ex) { 3410 errln("ISCII round trip test failed."); 3411 } 3412 3413 //Test new characters in the ISCII charset 3414 encoder = provider.charsetForName("ISCII,version=0").newEncoder(); 3415 decoder = provider.charsetForName("ISCII,version=0").newDecoder(); 3416 char u_pts[] = { 3417 /* DEV */ (char)0x0904, 3418 /* PNJ */ (char)0x0A01, (char)0x0A03, (char)0x0A33, (char)0x0A70 3419 }; 3420 byte b_pts[] = { 3421 (byte)0xef, (byte)0x42, 3422 /* DEV */ (byte)0xa4, (byte)0xe0, 3423 /* PNJ */ (byte)0xef, (byte)0x4b, (byte)0xa1, (byte)0xa3, (byte)0xd2, (byte)0xf0, (byte)0xbf 3424 }; 3425 us = CharBuffer.allocate(u_pts.length); 3426 bs = ByteBuffer.allocate(b_pts.length); 3427 us.put(u_pts); 3428 bs.put(b_pts); 3429 3430 bs.limit(bs.position()); 3431 bs.position(0); 3432 us.limit(us.position()); 3433 us.position(0); 3434 3435 try { 3436 smBufDecode(decoder, "ISCII-update", bs, us, true, true); 3437 bs.position(0); 3438 us.position(0); 3439 smBufEncode(encoder, "ISCII-update", us, bs, true, true); 3440 } catch (Exception ex) { 3441 errln("Error occurred while encoding/decoding ISCII with the new characters."); 3442 } 3443 3444 //The rest of the code in this method is to provide better code coverage 3445 CharBuffer ccus = CharBuffer.allocate(0x10); 3446 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 3447 3448 //start of charset decoder code coverage code 3449 //test overflow buffer 3450 ccbs.put((byte)0x49); 3451 3452 ccbs.limit(ccbs.position()); 3453 ccbs.position(0); 3454 ccus.limit(0); 3455 ccus.position(0); 3456 3457 try { 3458 smBufDecode(decoder, "ISCII-CC-DE-1", ccbs, ccus, true, false); 3459 errln("Exception while decoding ISCII should have been thrown."); 3460 } 3461 catch (Exception ex) { 3462 } 3463 3464 ccbs.clear(); 3465 ccus.clear(); 3466 3467 //test atr overflow buffer 3468 ccbs.put((byte)0xEF); ccbs.put((byte)0x40); ccbs.put((byte)0xEF); ccbs.put((byte)0x20); 3469 ccus.put((char)0x00); 3470 3471 ccbs.limit(ccbs.position()); 3472 ccbs.position(0); 3473 ccus.limit(ccus.position()); 3474 ccus.position(0); 3475 3476 try { 3477 smBufDecode(decoder, "ISCII-CC-DE-2", ccbs, ccus, true, false); 3478 errln("Exception while decoding ISCII should have been thrown."); 3479 } 3480 catch (Exception ex) { 3481 } 3482 3483 //end of charset decoder code coverage code 3484 3485 ccbs.clear(); 3486 ccus.clear(); 3487 3488 //start of charset encoder code coverage code 3489 //test ascii overflow buffer 3490 ccus.put((char)0x41); 3491 3492 ccus.limit(ccus.position()); 3493 ccus.position(0); 3494 ccbs.limit(0); 3495 ccbs.position(0); 3496 3497 try { 3498 smBufEncode(encoder, "ISCII-CC-EN-1", ccus, ccbs, true, false); 3499 errln("Exception while encoding ISCII should have been thrown."); 3500 } 3501 catch (Exception ex) { 3502 } 3503 3504 ccbs.clear(); 3505 ccus.clear(); 3506 3507 //test ascii overflow buffer 3508 ccus.put((char)0x0A); ccus.put((char)0x0043); 3509 ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3510 3511 ccus.limit(ccus.position()); 3512 ccus.position(0); 3513 ccbs.limit(ccbs.position()); 3514 ccbs.position(0); 3515 3516 try { 3517 smBufEncode(encoder, "ISCII-CC-EN-2", ccus, ccbs, true, false); 3518 errln("Exception while encoding ISCII should have been thrown."); 3519 } 3520 catch (Exception ex) { 3521 } 3522 3523 ccbs.clear(); 3524 ccus.clear(); 3525 3526 //test surrogate malform 3527 ccus.put((char)0x06E3); 3528 ccbs.put((byte)0x00); 3529 3530 ccus.limit(ccus.position()); 3531 ccus.position(0); 3532 ccbs.limit(ccbs.position()); 3533 ccbs.position(0); 3534 3535 try { 3536 smBufEncode(encoder, "ISCII-CC-EN-3", ccus, ccbs, true, false); 3537 errln("Exception while encoding ISCII should have been thrown."); 3538 } 3539 catch (Exception ex) { 3540 } 3541 3542 ccbs.clear(); 3543 ccus.clear(); 3544 3545 //test surrogate malform 3546 ccus.put((char)0xD801); ccus.put((char)0xDD01); 3547 ccbs.put((byte)0x00); 3548 3549 ccus.limit(ccus.position()); 3550 ccus.position(0); 3551 ccbs.limit(ccbs.position()); 3552 ccbs.position(0); 3553 3554 try { 3555 smBufEncode(encoder, "ISCII-CC-EN-4", ccus, ccbs, true, false); 3556 errln("Exception while encoding ISCII should have been thrown."); 3557 } 3558 catch (Exception ex) { 3559 } 3560 3561 ccbs.clear(); 3562 ccus.clear(); 3563 3564 //test trail surrogate malform 3565 ccus.put((char)0xDD01); 3566 ccbs.put((byte)0x00); 3567 3568 ccus.limit(ccus.position()); 3569 ccus.position(0); 3570 ccbs.limit(ccbs.position()); 3571 ccbs.position(0); 3572 3573 try { 3574 smBufEncode(encoder, "ISCII-CC-EN-5", ccus, ccbs, true, false); 3575 errln("Exception while encoding ISCII should have been thrown."); 3576 } 3577 catch (Exception ex) { 3578 } 3579 3580 ccbs.clear(); 3581 ccus.clear(); 3582 3583 //test lead surrogates malform 3584 ccus.put((char)0xD801); ccus.put((char)0xD802); 3585 ccbs.put((byte)0x00); 3586 3587 ccus.limit(ccus.position()); 3588 ccus.position(0); 3589 ccbs.limit(ccbs.position()); 3590 ccbs.position(0); 3591 3592 try { 3593 smBufEncode(encoder, "ISCII-CC-EN-6", ccus, ccbs, true, false); 3594 errln("Exception while encoding ISCII should have been thrown."); 3595 } 3596 catch (Exception ex) { 3597 } 3598 3599 ccus.clear(); 3600 ccbs.clear(); 3601 3602 //test overflow buffer 3603 ccus.put((char)0x0901); 3604 ccbs.put((byte)0x00); 3605 3606 ccus.limit(ccus.position()); 3607 ccus.position(0); 3608 ccbs.limit(ccbs.position()); 3609 ccbs.position(0); 3610 3611 cs = provider.charsetForName("ISCII,version=0"); 3612 encoder = cs.newEncoder(); 3613 3614 try { 3615 smBufEncode(encoder, "ISCII-CC-EN-7", ccus, ccbs, true, false); 3616 errln("Exception while encoding ISCII should have been thrown."); 3617 } 3618 catch (Exception ex) { 3619 } 3620 //end of charset encoder code coverage code 3621 } 3622 3623 //Test for the IMAP Charset 3624 @Test TestCharsetIMAP()3625 public void TestCharsetIMAP() { 3626 CharsetProvider provider = new CharsetProviderICU(); 3627 Charset cs = provider.charsetForName("IMAP-mailbox-name"); 3628 CharsetEncoder encoder = cs.newEncoder(); 3629 CharsetDecoder decoder = cs.newDecoder(); 3630 3631 CharBuffer us = CharBuffer.allocate(0x20); 3632 ByteBuffer bs = ByteBuffer.allocate(0x20); 3633 3634 us.put((char)0x00A3); us.put((char)0x2020); us.put((char)0x41); 3635 3636 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x4B); bs.put((byte)0x4D); bs.put((byte)0x67); bs.put((byte)0x49); 3637 bs.put((byte)0x41); bs.put((byte)0x2D); bs.put((byte)0x41); 3638 3639 3640 bs.limit(bs.position()); 3641 bs.position(0); 3642 us.limit(us.position()); 3643 us.position(0); 3644 3645 smBufDecode(decoder, "IMAP", bs, us); 3646 smBufEncode(encoder, "IMAP", us, bs); 3647 3648 //the rest of the code in this method is for better code coverage 3649 us.clear(); 3650 bs.clear(); 3651 3652 //start of charset encoder code coverage 3653 //test buffer overflow 3654 us.put((char)0x0026); us.put((char)0x17A9); 3655 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3656 3657 bs.limit(bs.position()); 3658 bs.position(0); 3659 us.limit(us.position()); 3660 us.position(0); 3661 3662 try { 3663 smBufEncode(encoder, "IMAP-EN-1", us, bs, true, false); 3664 errln("Exception while encoding IMAP (1) should have been thrown."); 3665 } catch(Exception ex) { 3666 } 3667 3668 us.clear(); 3669 bs.clear(); 3670 3671 //test buffer overflow 3672 us.put((char)0x17A9); us.put((char)0x0941); 3673 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3674 3675 bs.limit(bs.position()); 3676 bs.position(0); 3677 us.limit(us.position()); 3678 us.position(0); 3679 3680 try { 3681 smBufEncode(encoder, "IMAP-EN-2", us, bs, true, false); 3682 errln("Exception while encoding IMAP (2) should have been thrown."); 3683 } catch(Exception ex) { 3684 } 3685 3686 us.clear(); 3687 bs.clear(); 3688 3689 //test buffer overflow 3690 us.put((char)0x17A9); us.put((char)0x0941); 3691 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3692 3693 bs.limit(bs.position()); 3694 bs.position(0); 3695 us.limit(us.position()); 3696 us.position(0); 3697 3698 try { 3699 smBufEncode(encoder, "IMAP-EN-3", us, bs, true, false); 3700 errln("Exception while encoding IMAP (3) should have been thrown."); 3701 } catch(Exception ex) { 3702 } 3703 3704 us.clear(); 3705 bs.clear(); 3706 3707 //test buffer overflow 3708 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3709 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3710 bs.put((byte)0x00); 3711 3712 bs.limit(bs.position()); 3713 bs.position(0); 3714 us.limit(us.position()); 3715 us.position(0); 3716 3717 try { 3718 smBufEncode(encoder, "IMAP-EN-4", us, bs, true, false); 3719 errln("Exception while encoding IMAP (4) should have been thrown."); 3720 } catch(Exception ex) { 3721 } 3722 3723 us.clear(); 3724 bs.clear(); 3725 3726 //test buffer overflow 3727 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3728 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3729 bs.put((byte)0x00); bs.put((byte)0x00); 3730 3731 bs.limit(bs.position()); 3732 bs.position(0); 3733 us.limit(us.position()); 3734 us.position(0); 3735 3736 try { 3737 smBufEncode(encoder, "IMAP-EN-5", us, bs, true, false); 3738 errln("Exception while encoding IMAP (5) should have been thrown."); 3739 } catch(Exception ex) { 3740 } 3741 3742 us.clear(); 3743 bs.clear(); 3744 3745 //test buffer overflow 3746 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); us.put((char)0x0970); 3747 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3748 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3749 3750 bs.limit(bs.position()); 3751 bs.position(0); 3752 us.limit(us.position()); 3753 us.position(0); 3754 3755 try { 3756 smBufEncode(encoder, "IMAP-EN-6", us, bs, true, false); 3757 errln("Exception while encoding IMAP (6) should have been thrown."); 3758 } catch(Exception ex) { 3759 } 3760 3761 us.clear(); 3762 bs.clear(); 3763 3764 //test buffer overflow 3765 us.put((char)0x17A9); us.put((char)0x0941); 3766 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3767 bs.put((byte)0x00); 3768 3769 bs.limit(bs.position()); 3770 bs.position(0); 3771 us.limit(us.position()); 3772 us.position(0); 3773 3774 try { 3775 smBufEncode(encoder, "IMAP-EN-7", us, bs, true, true); 3776 errln("Exception while encoding IMAP (7) should have been thrown."); 3777 } catch(Exception ex) { 3778 } 3779 3780 us.clear(); 3781 bs.clear(); 3782 3783 //test flushing 3784 us.put((char)0x17A9); us.put((char)0x0941); 3785 bs.put((byte)0x26); bs.put((byte)0x46); bs.put((byte)0x36); bs.put((byte)0x6b); bs.put((byte)0x4a); bs.put((byte)0x51); 3786 bs.put((byte)0x51); bs.put((byte)0x2d); 3787 3788 bs.limit(bs.position()); 3789 bs.position(0); 3790 us.limit(us.position()); 3791 us.position(0); 3792 3793 try { 3794 smBufEncode(encoder, "IMAP-EN-8", us, bs, true, true); 3795 } catch(Exception ex) { 3796 errln("Exception while encoding IMAP (8) should not have been thrown."); 3797 } 3798 3799 us = CharBuffer.allocate(0x08); 3800 bs = ByteBuffer.allocate(0x08); 3801 3802 //test flushing buffer overflow 3803 us.put((char)0x0061); 3804 bs.put((byte)0x61); bs.put((byte)0x00); 3805 3806 bs.limit(bs.position()); 3807 bs.position(0); 3808 us.limit(us.position()); 3809 us.position(0); 3810 3811 try { 3812 smBufEncode(encoder, "IMAP-EN-9", us, bs, true, true); 3813 } catch(Exception ex) { 3814 errln("Exception while encoding IMAP (9) should not have been thrown."); 3815 } 3816 //end of charset encoder code coverage 3817 3818 us = CharBuffer.allocate(0x10); 3819 bs = ByteBuffer.allocate(0x10); 3820 3821 //start of charset decoder code coverage 3822 //test malform case 2 3823 us.put((char)0x0000); us.put((char)0x0000); 3824 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x43); bs.put((byte)0x41); 3825 3826 bs.limit(bs.position()); 3827 bs.position(0); 3828 us.limit(us.position()); 3829 us.position(0); 3830 3831 try { 3832 smBufDecode(decoder, "IMAP-DE-1", bs, us, true, false); 3833 errln("Exception while decoding IMAP (1) should have been thrown."); 3834 } catch(Exception ex) { 3835 } 3836 3837 us.clear(); 3838 bs.clear(); 3839 3840 //test malform case 5 3841 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3842 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3843 bs.put((byte)0x41); bs.put((byte)0x49); bs.put((byte)0x41); 3844 3845 bs.limit(bs.position()); 3846 bs.position(0); 3847 us.limit(us.position()); 3848 us.position(0); 3849 3850 try { 3851 smBufDecode(decoder, "IMAP-DE-2", bs, us, true, false); 3852 errln("Exception while decoding IMAP (2) should have been thrown."); 3853 } catch(Exception ex) { 3854 } 3855 3856 us.clear(); 3857 bs.clear(); 3858 3859 //test malform case 7 3860 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3861 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3862 bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x42); 3863 bs.put((byte)0x41); 3864 3865 bs.limit(bs.position()); 3866 bs.position(0); 3867 us.limit(us.position()); 3868 us.position(0); 3869 3870 try { 3871 smBufDecode(decoder, "IMAP-DE-3", bs, us, true, false); 3872 errln("Exception while decoding IMAP (3) should have been thrown."); 3873 } catch(Exception ex) { 3874 } 3875 //end of charset decoder coder coverage 3876 } 3877 3878 //Test for charset UTF32LE to provide better code coverage 3879 @Test TestCharsetUTF32LE()3880 public void TestCharsetUTF32LE() { 3881 CoderResult result = CoderResult.UNDERFLOW; 3882 CharsetProvider provider = new CharsetProviderICU(); 3883 Charset cs = provider.charsetForName("UTF-32LE"); 3884 CharsetEncoder encoder = cs.newEncoder(); 3885 //CharsetDecoder decoder = cs.newDecoder(); 3886 3887 CharBuffer us = CharBuffer.allocate(0x10); 3888 ByteBuffer bs = ByteBuffer.allocate(0x10); 3889 3890 3891 //test malform surrogate 3892 us.put((char)0xD901); 3893 bs.put((byte)0x00); 3894 3895 bs.limit(bs.position()); 3896 bs.position(0); 3897 us.limit(us.position()); 3898 us.position(0); 3899 3900 try { 3901 smBufEncode(encoder, "UTF32LE-EN-1", us, bs, true, false); 3902 errln("Exception while encoding UTF32LE (1) should have been thrown."); 3903 } catch (Exception ex) { 3904 } 3905 3906 bs.clear(); 3907 us.clear(); 3908 3909 //test malform surrogate 3910 us.put((char)0xD901); us.put((char)0xD902); 3911 bs.put((byte)0x00); 3912 3913 bs.limit(bs.position()); 3914 bs.position(0); 3915 us.limit(us.position()); 3916 us.position(0); 3917 3918 result = encoder.encode(us, bs, true); 3919 3920 if (!result.isError() && !result.isOverflow()) { 3921 errln("Error while encoding UTF32LE (2) should have occurred."); 3922 } 3923 3924 bs.clear(); 3925 us.clear(); 3926 3927 //test overflow trail surrogate 3928 us.put((char)0xDD01); us.put((char)0xDD0E); us.put((char)0xDD0E); 3929 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3930 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3931 3932 bs.limit(bs.position()); 3933 bs.position(0); 3934 us.limit(us.position()); 3935 us.position(0); 3936 3937 result = encoder.encode(us, bs, true); 3938 3939 if (!result.isError() && !result.isOverflow()) { 3940 errln("Error while encoding UTF32LE (3) should have occurred."); 3941 } 3942 3943 bs.clear(); 3944 us.clear(); 3945 3946 //test malform lead surrogate 3947 us.put((char)0xD90D); us.put((char)0xD90E); 3948 bs.put((byte)0x00); 3949 3950 bs.limit(bs.position()); 3951 bs.position(0); 3952 us.limit(us.position()); 3953 us.position(0); 3954 3955 try { 3956 smBufEncode(encoder, "UTF32LE-EN-4", us, bs, true, false); 3957 errln("Exception while encoding UTF32LE (4) should have been thrown."); 3958 } catch (Exception ex) { 3959 } 3960 3961 bs.clear(); 3962 us.clear(); 3963 3964 //test overflow buffer 3965 us.put((char)0x0061); 3966 bs.put((byte)0x00); 3967 3968 bs.limit(bs.position()); 3969 bs.position(0); 3970 us.limit(us.position()); 3971 us.position(0); 3972 3973 try { 3974 smBufEncode(encoder, "UTF32LE-EN-5", us, bs, true, false); 3975 errln("Exception while encoding UTF32LE (5) should have been thrown."); 3976 } catch (Exception ex) { 3977 } 3978 3979 bs.clear(); 3980 us.clear(); 3981 3982 //test malform trail surrogate 3983 us.put((char)0xDD01); 3984 bs.put((byte)0x00); 3985 3986 bs.limit(bs.position()); 3987 bs.position(0); 3988 us.limit(us.position()); 3989 us.position(0); 3990 3991 try { 3992 smBufEncode(encoder, "UTF32LE-EN-6", us, bs, true, false); 3993 errln("Exception while encoding UTF32LE (6) should have been thrown."); 3994 } catch (Exception ex) { 3995 } 3996 } 3997 3998 //Test for charset UTF16LE to provide better code coverage 3999 @Test TestCharsetUTF16LE()4000 public void TestCharsetUTF16LE() { 4001 CoderResult result = CoderResult.UNDERFLOW; 4002 CharsetProvider provider = new CharsetProviderICU(); 4003 Charset cs = provider.charsetForName("UTF-16LE"); 4004 CharsetEncoder encoder = cs.newEncoder(); 4005 //CharsetDecoder decoder = cs.newDecoder(); 4006 4007 // Test for malform and change fromUChar32 for next call 4008 char u_pts1[] = { 4009 (char)0xD805, 4010 (char)0xDC01, (char)0xDC02, (char)0xDC03, 4011 (char)0xD901, (char)0xD902 4012 }; 4013 byte b_pts1[] = { 4014 (byte)0x00, 4015 (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 4016 }; 4017 4018 CharBuffer us = CharBuffer.allocate(u_pts1.length); 4019 ByteBuffer bs = ByteBuffer.allocate(b_pts1.length); 4020 4021 us.put(u_pts1); 4022 bs.put(b_pts1); 4023 4024 us.limit(1); 4025 us.position(0); 4026 bs.limit(1); 4027 bs.position(0); 4028 4029 result = encoder.encode(us, bs, true); 4030 4031 if (!result.isMalformed()) { 4032 // LE should not output BOM, so this should be malformed 4033 errln("Malformed while encoding UTF-16LE (1) should have occured."); 4034 } 4035 4036 // Test for malform surrogate from previous buffer 4037 us.limit(4); 4038 us.position(1); 4039 bs.limit(7); 4040 bs.position(1); 4041 4042 result = encoder.encode(us, bs, true); 4043 4044 if (!result.isMalformed()) { 4045 errln("Error while encoding UTF-16LE (2) should have occured."); 4046 } 4047 4048 // Test for malform trail surrogate 4049 encoder.reset(); 4050 4051 us.limit(1); 4052 us.position(0); 4053 bs.limit(1); 4054 bs.position(0); 4055 4056 result = encoder.encode(us, bs, true); 4057 4058 us.limit(6); 4059 us.position(4); 4060 bs.limit(4); 4061 bs.position(1); 4062 4063 result = encoder.encode(us, bs, true); 4064 4065 if (!result.isMalformed()) { 4066 errln("Error while encoding UTF-16LE (3) should have occured."); 4067 } 4068 } 4069 4070 //provide better code coverage for the generic charset UTF32 4071 @Test TestCharsetUTF32()4072 public void TestCharsetUTF32() { 4073 CoderResult result = CoderResult.UNDERFLOW; 4074 CharsetProvider provider = new CharsetProviderICU(); 4075 Charset cs = provider.charsetForName("UTF-32"); 4076 CharsetDecoder decoder = cs.newDecoder(); 4077 CharsetEncoder encoder = cs.newEncoder(); 4078 4079 //start of decoding code coverage 4080 char us_array[] = { 4081 0x0000, 0x0000, 0x0000, 0x0000, 4082 }; 4083 4084 byte bs_array1[] = { 4085 (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF, 4086 (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43, 4087 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4088 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4089 }; 4090 4091 byte bs_array2[] = { 4092 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4093 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4094 }; 4095 4096 CharBuffer us = CharBuffer.allocate(us_array.length); 4097 ByteBuffer bs = ByteBuffer.allocate(bs_array1.length); 4098 4099 us.put(us_array); 4100 bs.put(bs_array1); 4101 4102 us.limit(us.position()); 4103 us.position(0); 4104 bs.limit(bs.position()); 4105 bs.position(0); 4106 4107 try { 4108 smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false); 4109 errln("Malform exception while decoding UTF32 charset (1) should have been thrown."); 4110 } catch (Exception ex) { 4111 } 4112 4113 decoder = cs.newDecoder(); 4114 4115 bs = ByteBuffer.allocate(bs_array2.length); 4116 bs.put(bs_array2); 4117 4118 us.limit(4); 4119 us.position(0); 4120 bs.limit(bs.position()); 4121 bs.position(0); 4122 4123 try { 4124 smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false); 4125 } catch (Exception ex) { 4126 // should recognize little endian BOM 4127 errln("Exception while decoding UTF32 charset (2) should not have been thrown."); 4128 } 4129 4130 //Test malform exception 4131 bs.clear(); 4132 us.clear(); 4133 4134 bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00); 4135 us.put((char)0x0000); 4136 4137 us.limit(us.position()); 4138 us.position(0); 4139 bs.limit(bs.position()); 4140 bs.position(0); 4141 4142 try { 4143 smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false); 4144 errln("Malform exception while decoding UTF32 charset (3) should have been thrown."); 4145 } catch (Exception ex) { 4146 } 4147 4148 //Test BOM testing 4149 bs.clear(); 4150 us.clear(); 4151 4152 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE); 4153 us.put((char)0x0000); 4154 4155 us.limit(us.position()); 4156 us.position(0); 4157 bs.limit(bs.position()); 4158 bs.position(0); 4159 4160 try { 4161 smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false); 4162 } catch (Exception ex) { 4163 // should recognize big endian BOM 4164 errln("Exception while decoding UTF32 charset (4) should not have been thrown."); 4165 } 4166 //end of decoding code coverage 4167 4168 //start of encoding code coverage 4169 us = CharBuffer.allocate(0x10); 4170 bs = ByteBuffer.allocate(0x10); 4171 4172 //test wite BOM overflow error 4173 us.put((char)0xDC01); 4174 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4175 4176 us.limit(us.position()); 4177 us.position(0); 4178 bs.limit(bs.position()); 4179 bs.position(0); 4180 4181 result = encoder.encode(us, bs, true); 4182 // must try to output BOM first for UTF-32 (not UTF-32BE or UTF-32LE) 4183 if (!result.isOverflow()) { 4184 errln("Buffer overflow error while encoding UTF32 charset (1) should have occurred."); 4185 } 4186 4187 us.clear(); 4188 bs.clear(); 4189 4190 //test malform surrogate and store value in fromChar32 4191 us.put((char)0xD801); us.put((char)0xD802); 4192 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4193 4194 us.limit(us.position()); 4195 us.position(0); 4196 bs.limit(bs.position()); 4197 bs.position(0); 4198 4199 result = encoder.encode(us, bs, true); 4200 if (!result.isMalformed()) { 4201 errln("Malformed error while encoding UTF32 charset (2) should have occurred."); 4202 } 4203 4204 us.clear(); 4205 bs.clear(); 4206 4207 //test malform surrogate 4208 us.put((char)0x0000); us.put((char)0xD902); 4209 4210 us.limit(us.position()); 4211 us.position(0); 4212 bs.limit(bs.position()); 4213 bs.position(0); 4214 4215 result = encoder.encode(us, bs, true); 4216 if (!result.isOverflow()) { 4217 errln("Overflow error while encoding UTF32 charset (3) should have occurred."); 4218 } 4219 4220 us.clear(); 4221 bs.clear(); 4222 4223 //test malform surrogate 4224 encoder.reset(); 4225 us.put((char)0xD801); 4226 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4227 4228 us.limit(us.position()); 4229 us.position(0); 4230 bs.limit(bs.position()); 4231 bs.position(0); 4232 4233 result = encoder.encode(us, bs, true); 4234 if (!result.isMalformed()) { 4235 errln("Malform error while encoding UTF32 charset (4) should have occurred."); 4236 } 4237 4238 us.clear(); 4239 bs.clear(); 4240 4241 //test overflow surrogate 4242 us.put((char)0x0000); us.put((char)0xDDE1); us.put((char)0xD915); us.put((char)0xDDF2); 4243 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4244 4245 us.limit(us.position()); 4246 us.position(0); 4247 bs.limit(bs.position()); 4248 bs.position(0); 4249 4250 result = encoder.encode(us, bs, true); 4251 if (!result.isOverflow()) { 4252 errln("Overflow error while encoding UTF32 charset (5) should have occurred."); 4253 } 4254 4255 us.clear(); 4256 bs.clear(); 4257 4258 //test malform surrogate 4259 encoder.reset(); 4260 us.put((char)0xDDE1); 4261 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4262 4263 us.limit(us.position()); 4264 us.position(0); 4265 bs.limit(bs.position()); 4266 bs.position(0); 4267 4268 result = encoder.encode(us, bs, true); 4269 if (!result.isMalformed()) { 4270 errln("Malform error while encoding UTF32 charset (6) should have occurred."); 4271 } 4272 //end of encoding code coverage 4273 } 4274 4275 //this method provides better code coverage decoding UTF32 LE/BE 4276 @Test TestDecodeUTF32LEBE()4277 public void TestDecodeUTF32LEBE() { 4278 CoderResult result = CoderResult.UNDERFLOW; 4279 CharsetProvider provider = new CharsetProviderICU(); 4280 CharsetDecoder decoder; 4281 CharBuffer us = CharBuffer.allocate(0x10); 4282 ByteBuffer bs = ByteBuffer.allocate(0x10); 4283 4284 //decode UTF32LE 4285 decoder = provider.charsetForName("UTF-32LE").newDecoder(); 4286 //test overflow buffer 4287 bs.put((byte)0x41); bs.put((byte)0xFF); bs.put((byte)0x01); bs.put((byte)0x00); 4288 us.put((char)0x0000); 4289 4290 us.limit(us.position()); 4291 us.position(0); 4292 bs.limit(bs.position()); 4293 bs.position(0); 4294 4295 try { 4296 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4297 errln("Overflow exception while decoding UTF32LE (1) should have been thrown."); 4298 } catch (Exception ex) { 4299 } 4300 // test overflow buffer handling in CharsetDecoderICU 4301 bs.position(0); 4302 us.position(0); 4303 decoder.reset(); 4304 result = decoder.decode(bs, us, true); 4305 if (result.isOverflow()) { 4306 result = decoder.decode(bs, us, true); 4307 if (!result.isOverflow()) { 4308 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4309 } 4310 } else { 4311 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4312 } 4313 4314 us.clear(); 4315 bs.clear(); 4316 //test malform buffer 4317 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4318 us.put((char)0x0000); 4319 4320 us.limit(us.position()); 4321 us.position(0); 4322 bs.limit(bs.position()); 4323 bs.position(0); 4324 4325 try { 4326 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4327 errln("Malform exception while decoding UTF32LE (2) should have been thrown."); 4328 } catch (Exception ex) { 4329 } 4330 4331 us.clear(); 4332 bs.clear(); 4333 //test malform buffer 4334 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4335 bs.put((byte)0xFF); bs.put((byte)0xDF); bs.put((byte)0x10); 4336 us.put((char)0x0000); 4337 4338 us.limit(us.position()); 4339 us.position(0); 4340 bs.limit(bs.position()); 4341 bs.position(0); 4342 4343 try { 4344 // must flush in order to exhibit malformed behavior 4345 smBufDecode(decoder, "UTF-32LE", bs, us, true, true); 4346 errln("Malform exception while decoding UTF32LE (3) should have been thrown."); 4347 } catch (Exception ex) { 4348 } 4349 4350 us.clear(); 4351 bs.clear(); 4352 //test malform buffer 4353 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4354 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4355 us.put((char)0x0000); 4356 4357 us.limit(us.position()); 4358 us.position(0); 4359 bs.limit(bs.position()); 4360 bs.position(0); 4361 4362 try { 4363 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4364 errln("Malform exception while decoding UTF32LE (4) should have been thrown."); 4365 } catch (Exception ex) { 4366 } 4367 4368 us.clear(); 4369 bs.clear(); 4370 //test overflow buffer 4371 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4372 bs.put((byte)0xDD); bs.put((byte)0xFF); bs.put((byte)0x10); bs.put((byte)0x00); 4373 us.put((char)0x0000); 4374 4375 us.limit(us.position()); 4376 us.position(0); 4377 bs.limit(bs.position()); 4378 bs.position(0); 4379 4380 try { 4381 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4382 errln("Overflow exception while decoding UTF32LE (5) should have been thrown."); 4383 } catch (Exception ex) { 4384 } 4385 //end of decode UTF32LE 4386 4387 bs.clear(); 4388 us.clear(); 4389 4390 //decode UTF32BE 4391 decoder = provider.charsetForName("UTF-32BE").newDecoder(); 4392 //test overflow buffer 4393 bs.put((byte)0x00); bs.put((byte)0x01); bs.put((byte)0xFF); bs.put((byte)0x41); 4394 us.put((char)0x0000); 4395 4396 us.limit(us.position()); 4397 us.position(0); 4398 bs.limit(bs.position()); 4399 bs.position(0); 4400 4401 try { 4402 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4403 errln("Overflow exception while decoding UTF32BE (1) should have been thrown."); 4404 } catch (Exception ex) { 4405 } 4406 4407 bs.clear(); 4408 us.clear(); 4409 //test malform buffer 4410 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xD9); bs.put((byte)0x02); 4411 us.put((char)0x0000); 4412 4413 us.limit(us.position()); 4414 us.position(0); 4415 bs.limit(bs.position()); 4416 bs.position(0); 4417 4418 try { 4419 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4420 errln("Malform exception while decoding UTF32BE (2) should have been thrown."); 4421 } catch (Exception ex) { 4422 } 4423 4424 bs.clear(); 4425 us.clear(); 4426 //test malform buffer 4427 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4428 bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDF); 4429 us.put((char)0x0000); 4430 4431 us.limit(us.position()); 4432 us.position(0); 4433 bs.limit(bs.position()); 4434 bs.position(0); 4435 4436 try { 4437 // must flush to exhibit malformed behavior 4438 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4439 errln("Malform exception while decoding UTF32BE (3) should have been thrown."); 4440 } catch (Exception ex) { 4441 } 4442 4443 bs.clear(); 4444 us.clear(); 4445 //test overflow buffer 4446 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4447 bs.put((byte)0x00); bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDD); 4448 us.put((char)0x0000); 4449 4450 us.limit(us.position()); 4451 us.position(0); 4452 bs.limit(bs.position()); 4453 bs.position(0); 4454 4455 try { 4456 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4457 errln("Overflow exception while decoding UTF32BE (4) should have been thrown."); 4458 } catch (Exception ex) { 4459 } 4460 4461 bs.clear(); 4462 us.clear(); 4463 //test malform buffer 4464 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); 4465 us.put((char)0x0000); 4466 4467 us.limit(us.position()); 4468 us.position(0); 4469 bs.limit(bs.position()); 4470 bs.position(0); 4471 4472 try { 4473 // must flush to exhibit malformed behavior 4474 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4475 errln("Malform exception while decoding UTF32BE (5) should have been thrown."); 4476 } catch (Exception ex) { 4477 } 4478 //end of decode UTF32BE 4479 } 4480 4481 //provide better code coverage for UTF8 4482 @Test TestCharsetUTF8()4483 public void TestCharsetUTF8() { 4484 CoderResult result = CoderResult.UNDERFLOW; 4485 CharsetProvider provider = new CharsetProviderICU(); 4486 CharsetDecoder decoder = provider.charsetForName("UTF-8").newDecoder(); 4487 CharsetEncoder encoder = provider.charsetForName("UTF-8").newEncoder(); 4488 4489 CharBuffer us = CharBuffer.allocate(0x10); 4490 ByteBuffer bs = ByteBuffer.allocate(0x10); 4491 ByteBuffer bs2; 4492 CharBuffer us2; 4493 int limit_us; 4494 int limit_bs; 4495 4496 //encode and decode using read only buffer 4497 encoder.reset(); 4498 decoder.reset(); 4499 us.put((char)0x0041); us.put((char)0x0081); us.put((char)0xEF65); us.put((char)0xD902); 4500 bs.put((byte)0x41); bs.put((byte)0xc2); bs.put((byte)0x81); bs.put((byte)0xee); bs.put((byte)0xbd); bs.put((byte)0xa5); 4501 bs.put((byte)0x00); 4502 limit_us = us.position(); 4503 limit_bs = bs.position(); 4504 4505 us.limit(limit_us); 4506 us.position(0); 4507 bs.limit(limit_bs); 4508 bs.position(0); 4509 bs2 = bs.asReadOnlyBuffer(); 4510 us2 = us.asReadOnlyBuffer(); 4511 4512 result = decoder.decode(bs2, us, true); 4513 if (!result.isUnderflow() || !equals(us, us2)) { 4514 errln("Error while decoding UTF-8 (1) should not have occured."); 4515 } 4516 4517 us2.limit(limit_us); 4518 us2.position(0); 4519 bs.limit(limit_bs); 4520 bs.position(0); 4521 4522 result = encoder.encode(us2, bs, true); 4523 if (!result.isUnderflow() || !equals(bs, bs2)) { 4524 errln("Error while encoding UTF-8 (1) should not have occured."); 4525 } 4526 4527 us.clear(); 4528 bs.clear(); 4529 4530 //test overflow buffer while encoding 4531 //readonly buffer 4532 encoder.reset(); 4533 us.put((char)0x0081); us.put((char)0xEF65); 4534 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4535 limit_us = us.position(); 4536 us2 = us.asReadOnlyBuffer(); 4537 us2.limit(limit_us); 4538 us2.position(0); 4539 bs.limit(1); 4540 bs.position(0); 4541 result = encoder.encode(us2, bs, true); 4542 if (!result.isOverflow()) { 4543 errln("Overflow Error should have occured while encoding UTF-8 (2)."); 4544 } 4545 4546 encoder.reset(); 4547 4548 us2.limit(limit_us); 4549 us2.position(1); 4550 bs.limit(1); 4551 bs.position(0); 4552 result = encoder.encode(us2, bs, true); 4553 if (!result.isOverflow()) { 4554 errln("Overflow Error should have occured while encoding UTF-8 (3)."); 4555 } 4556 4557 encoder.reset(); 4558 4559 us2.limit(limit_us); 4560 us2.position(1); 4561 bs.limit(2); 4562 bs.position(0); 4563 result = encoder.encode(us2, bs, true); 4564 if (!result.isOverflow()) { 4565 errln("Overflow Error should have occured while encoding UTF-8 (4)."); 4566 } 4567 4568 encoder.reset(); 4569 4570 us2.limit(limit_us); 4571 us2.position(0); 4572 bs.limit(2); 4573 bs.position(0); 4574 result = encoder.encode(us2, bs, true); 4575 if (!result.isOverflow()) { 4576 errln("Overflow Error should have occured while encoding UTF-8 (5)."); 4577 } 4578 4579 //not readonly buffer 4580 encoder.reset(); 4581 4582 us.limit(limit_us); 4583 us.position(0); 4584 bs.limit(1); 4585 bs.position(0); 4586 result = encoder.encode(us, bs, true); 4587 if (!result.isOverflow()) { 4588 errln("Overflow Error should have occured while encoding UTF-8 (6)."); 4589 } 4590 4591 encoder.reset(); 4592 4593 us.limit(limit_us); 4594 us.position(0); 4595 bs.limit(3); 4596 bs.position(0); 4597 result = encoder.encode(us, bs, true); 4598 if (!result.isOverflow()) { 4599 errln("Overflow Error should have occured while encoding UTF-8 (7)."); 4600 } 4601 4602 encoder.reset(); 4603 4604 us.limit(limit_us); 4605 us.position(1); 4606 bs.limit(2); 4607 bs.position(0); 4608 result = encoder.encode(us, bs, true); 4609 if (!result.isOverflow()) { 4610 errln("Overflow Error should have occured while encoding UTF-8 (8)."); 4611 } 4612 4613 encoder.reset(); 4614 4615 us.limit(limit_us + 1); 4616 us.position(1); 4617 bs.limit(3); 4618 bs.position(0); 4619 result = encoder.encode(us, bs, true); 4620 if (!result.isOverflow()) { 4621 errln("Overflow Error should have occured while encoding UTF-8 (9)."); 4622 } 4623 4624 us.clear(); 4625 bs.clear(); 4626 4627 //test encoding 4 byte characters 4628 encoder.reset(); 4629 us.put((char)0xD902); us.put((char)0xDD02); us.put((char)0x0041); 4630 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4631 limit_us = us.position(); 4632 us2 = us.asReadOnlyBuffer(); 4633 us2.limit(limit_us); 4634 us2.position(0); 4635 bs.limit(1); 4636 bs.position(0); 4637 result = encoder.encode(us2, bs, true); 4638 if (!result.isOverflow()) { 4639 errln("Overflow Error should have occured while encoding UTF-8 (10)."); 4640 } 4641 4642 encoder.reset(); 4643 4644 us2.limit(limit_us); 4645 us2.position(0); 4646 bs.limit(2); 4647 bs.position(0); 4648 result = encoder.encode(us2, bs, true); 4649 if (!result.isOverflow()) { 4650 errln("Overflow Error should have occured while encoding UTF-8 (11)."); 4651 } 4652 4653 encoder.reset(); 4654 4655 us2.limit(limit_us); 4656 us2.position(0); 4657 bs.limit(3); 4658 bs.position(0); 4659 result = encoder.encode(us2, bs, true); 4660 if (!result.isOverflow()) { 4661 errln("Overflow Error should have occured while encoding UTF-8 (12)."); 4662 } 4663 4664 encoder.reset(); 4665 4666 us2.limit(limit_us); 4667 us2.position(0); 4668 bs.limit(4); 4669 bs.position(0); 4670 result = encoder.encode(us2, bs, true); 4671 if (!result.isOverflow()) { 4672 errln("Overflow Error should have occured while encoding UTF-8 (13)."); 4673 } 4674 4675 us.clear(); 4676 bs.clear(); 4677 4678 //decoding code coverage 4679 //test malform error 4680 decoder.reset(); 4681 bs.put((byte)0xC2); bs.put((byte)0xC2); 4682 us.put((char)0x0000); 4683 bs2 = bs.asReadOnlyBuffer(); 4684 4685 us.limit(1); 4686 us.position(0); 4687 bs2.limit(1); 4688 bs2.position(0); 4689 4690 result = decoder.decode(bs2, us, true); 4691 result = decoder.flush(us); 4692 if (!result.isMalformed()) { 4693 errln("Malform error should have occurred while decoding UTF-8 (1)."); 4694 } 4695 4696 us.limit(1); 4697 us.position(0); 4698 bs2.limit(1); 4699 bs2.position(0); 4700 4701 decoder.reset(); 4702 4703 result = decoder.decode(bs2, us, true); 4704 us.limit(1); 4705 us.position(0); 4706 bs2.limit(2); 4707 bs2.position(0); 4708 result = decoder.decode(bs2, us, true); 4709 if (!result.isMalformed()) { 4710 errln("Malform error should have occurred while decoding UTF-8 (2)."); 4711 } 4712 4713 us.clear(); 4714 bs.clear(); 4715 4716 //test overflow buffer 4717 bs.put((byte)0x01); bs.put((byte)0x41); 4718 us.put((char)0x0000); 4719 bs2 = bs.asReadOnlyBuffer(); 4720 us.limit(1); 4721 us.position(0); 4722 bs2.limit(2); 4723 bs2.position(0); 4724 4725 result = decoder.decode(bs2, us, true); 4726 if (!result.isOverflow()) { 4727 errln("Overflow error should have occurred while decoding UTF-8 (3)."); 4728 } 4729 4730 us.clear(); 4731 bs.clear(); 4732 4733 //test malform string 4734 decoder.reset(); 4735 bs.put((byte)0xF5); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4736 us.put((char)0x0000); 4737 bs2 = bs.asReadOnlyBuffer(); 4738 us.limit(1); 4739 us.position(0); 4740 bs2.limit(4); 4741 bs2.position(0); 4742 4743 result = decoder.decode(bs2, us, true); 4744 if (!result.isMalformed()) { 4745 errln("Malform error should have occurred while decoding UTF-8 (4)."); 4746 } 4747 4748 bs.clear(); 4749 4750 //test overflow 4751 decoder.reset(); 4752 bs.put((byte)0xF3); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4753 bs2 = bs.asReadOnlyBuffer(); 4754 us.limit(1); 4755 us.position(0); 4756 bs2.limit(4); 4757 bs2.position(0); 4758 4759 result = decoder.decode(bs2, us, true); 4760 if (!result.isOverflow()) { 4761 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4762 } 4763 4764 //test overflow 4765 decoder.reset(); 4766 us.limit(2); 4767 us.position(0); 4768 bs2.limit(5); 4769 bs2.position(0); 4770 4771 result = decoder.decode(bs2, us, true); 4772 if (!result.isOverflow()) { 4773 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4774 } 4775 4776 //test overflow 4777 decoder.reset(); 4778 us.limit(1); 4779 us.position(0); 4780 bs.limit(5); 4781 bs.position(0); 4782 4783 result = decoder.decode(bs, us, true); 4784 if (!result.isOverflow()) { 4785 errln("Overflow error should have occurred while decoding UTF-8 (6)."); 4786 } 4787 4788 bs.clear(); 4789 4790 //test overflow 4791 decoder.reset(); 4792 bs.put((byte)0x41); bs.put((byte)0x42); 4793 us.limit(1); 4794 us.position(0); 4795 bs.limit(2); 4796 bs.position(0); 4797 4798 result = decoder.decode(bs, us, true); 4799 if (!result.isOverflow()) { 4800 errln("Overflow error should have occurred while decoding UTF-8 (7)."); 4801 } 4802 4803 } 4804 4805 //provide better code coverage for Charset UTF16 4806 @Test TestCharsetUTF16()4807 public void TestCharsetUTF16() { 4808 CoderResult result = CoderResult.UNDERFLOW; 4809 CharsetProvider provider = new CharsetProviderICU(); 4810 CharsetDecoder decoder = provider.charsetForName("UTF-16").newDecoder(); 4811 CharsetEncoder encoder = provider.charsetForName("UTF-16").newEncoder(); 4812 4813 CharBuffer us = CharBuffer.allocate(0x10); 4814 ByteBuffer bs = ByteBuffer.allocate(0x10); 4815 4816 //test flush buffer and malform string 4817 bs.put((byte)0xFF); 4818 us.put((char)0x0000); 4819 4820 us.limit(us.position()); 4821 us.position(0); 4822 bs.limit(bs.position()); 4823 bs.position(0); 4824 4825 result = decoder.decode(bs, us, true); 4826 result = decoder.flush(us); 4827 if (!result.isMalformed()) { 4828 errln("Malform error while decoding UTF-16 should have occurred."); 4829 } 4830 4831 us.clear(); 4832 bs.clear(); 4833 4834 us.put((char)0xD902); us.put((char)0xDD01); us.put((char)0x0041); 4835 4836 us.limit(1); 4837 us.position(0); 4838 bs.limit(4); 4839 bs.position(0); 4840 4841 result = encoder.encode(us, bs, true); 4842 us.limit(3); 4843 us.position(0); 4844 bs.limit(3); 4845 bs.position(0); 4846 result = encoder.encode(us, bs, true); 4847 if (!result.isOverflow()) { 4848 errln("Overflow buffer while encoding UTF-16 should have occurred."); 4849 } 4850 4851 us.clear(); 4852 bs.clear(); 4853 4854 //test overflow buffer 4855 decoder.reset(); 4856 decoder = provider.charsetForName("UTF-16BE").newDecoder(); 4857 4858 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x41); 4859 4860 us.limit(0); 4861 us.position(0); 4862 bs.limit(3); 4863 bs.position(0); 4864 4865 result = decoder.decode(bs, us, true); 4866 if (!result.isOverflow()) { 4867 errln("Overflow buffer while decoding UTF-16 should have occurred."); 4868 } 4869 } 4870 4871 //provide better code coverage for Charset ISO-2022-KR 4872 @Test TestCharsetISO2022KR()4873 public void TestCharsetISO2022KR() { 4874 CoderResult result = CoderResult.UNDERFLOW; 4875 CharsetProvider provider = new CharsetProviderICU(); 4876 CharsetDecoder decoder = provider.charsetForName("ISO-2022-KR").newDecoder(); 4877 4878 byte bytearray[] = { 4879 (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x43, (byte)0x41, (byte)0x42, 4880 }; 4881 char chararray[] = { 4882 (char)0x0041 4883 }; 4884 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4885 CharBuffer cb = CharBuffer.wrap(chararray); 4886 4887 result = decoder.decode(bb, cb, true); 4888 4889 if (!result.isOverflow()) { 4890 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4891 } 4892 } 4893 4894 //provide better code coverage for Charset ISO-2022-JP 4895 @Test TestCharsetISO2022JP()4896 public void TestCharsetISO2022JP() { 4897 CoderResult result = CoderResult.UNDERFLOW; 4898 CharsetProvider provider = new CharsetProviderICU(); 4899 CharsetDecoder decoder = provider.charsetForName("ISO-2022-JP-2").newDecoder(); 4900 4901 byte bytearray[] = { 4902 (byte)0x1b, (byte)0x24, (byte)0x28, (byte)0x44, (byte)0x0A, (byte)0x41, 4903 }; 4904 char chararray[] = { 4905 (char)0x000A 4906 }; 4907 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4908 CharBuffer cb = CharBuffer.wrap(chararray); 4909 4910 result = decoder.decode(bb, cb, true); 4911 4912 if (!result.isOverflow()) { 4913 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4914 } 4915 } 4916 4917 //provide better code coverage for Charset ASCII 4918 @Test TestCharsetASCII()4919 public void TestCharsetASCII() { 4920 CoderResult result = CoderResult.UNDERFLOW; 4921 CharsetProvider provider = new CharsetProviderICU(); 4922 CharsetDecoder decoder = provider.charsetForName("US-ASCII").newDecoder(); 4923 4924 byte bytearray[] = { 4925 (byte)0x41 4926 }; 4927 char chararray[] = { 4928 (char)0x0041 4929 }; 4930 4931 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4932 CharBuffer cb = CharBuffer.wrap(chararray); 4933 4934 result = decoder.decode(bb, cb, true); 4935 result = decoder.flush(cb); 4936 4937 if (result.isError()) { 4938 errln("Error occurred while decoding US-ASCII."); 4939 } 4940 } 4941 4942 // provide better code coverage for Charset Callbacks 4943 /* Different aspects of callbacks are being tested including using different context available */ 4944 @Test TestCharsetCallbacks()4945 public void TestCharsetCallbacks() { 4946 CoderResult result = CoderResult.UNDERFLOW; 4947 CharsetProvider provider = new CharsetProviderICU(); 4948 CharsetEncoder encoder = provider.charsetForName("iso-2022-jp").newEncoder(); 4949 CharsetDecoder decoder = provider.charsetForName("iso-2022-jp").newDecoder(); 4950 4951 String context3[] = { 4952 "i", 4953 "J" 4954 }; 4955 4956 // Testing encoder escape callback 4957 String context1[] = { 4958 "J", 4959 "C", 4960 "D", 4961 null 4962 }; 4963 char chararray[] = { 4964 (char)0xd122 4965 }; 4966 ByteBuffer bb = ByteBuffer.allocate(20); 4967 CharBuffer cb = CharBuffer.wrap(chararray); 4968 4969 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.OVERFLOW, CharsetCallback.FROM_U_CALLBACK_ESCAPE, null); // This callback is not valid. 4970 for (int i = 0; i < context1.length; i++) { 4971 encoder.reset(); 4972 cb.position(0); 4973 bb.position(0); 4974 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_ESCAPE, context1[i]); // This callback is valid. 4975 4976 result = encoder.encode(cb, bb, true); 4977 if (result.isError()) { 4978 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4979 } 4980 } 4981 4982 // Testing encoder skip callback 4983 for (int i = 0; i < context3.length; i++) { 4984 encoder.reset(); 4985 cb.position(0); 4986 bb.position(0); 4987 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SKIP, context3[i]); 4988 4989 result = encoder.encode(cb, bb, true); 4990 if (result.isError() && i == 0) { 4991 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4992 } 4993 } 4994 4995 // Testing encoder sub callback 4996 for (int i = 0; i < context3.length; i++) { 4997 encoder.reset(); 4998 cb.position(0); 4999 bb.position(0); 5000 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE, context3[i]); 5001 5002 result = encoder.encode(cb, bb, true); 5003 if (result.isError() && i == 0) { 5004 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5005 } 5006 } 5007 5008 // Testing decoder escape callback 5009 String context2[] = { 5010 "X", 5011 "C", 5012 "D", 5013 null 5014 }; 5015 byte bytearray[] = { 5016 (byte)0x1b, (byte)0x2e, (byte)0x43 5017 }; 5018 bb = ByteBuffer.wrap(bytearray); 5019 cb = CharBuffer.allocate(20); 5020 5021 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_ESCAPE, null); // This callback is not valid. 5022 for (int i = 0; i < context2.length; i++) { 5023 decoder.reset(); 5024 cb.position(0); 5025 bb.position(0); 5026 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_ESCAPE, context2[i]); // This callback is valid. 5027 5028 result = decoder.decode(bb, cb, true); 5029 if (result.isError()) { 5030 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder."); 5031 } 5032 } 5033 5034 // Testing decoder skip callback 5035 for (int i = 0; i < context3.length; i++) { 5036 decoder.reset(); 5037 cb.position(0); 5038 bb.position(0); 5039 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_SKIP, context3[i]); 5040 result = decoder.decode(bb, cb, true); 5041 if (!result.isError()) { 5042 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder should have occurred."); 5043 } 5044 } 5045 } 5046 5047 // Testing invalid input exceptions 5048 @Test TestInvalidInput()5049 public void TestInvalidInput() { 5050 CharsetProvider provider = new CharsetProviderICU(); 5051 Charset charset = provider.charsetForName("iso-2022-jp"); 5052 CharsetEncoder encoder = charset.newEncoder(); 5053 CharsetDecoder decoder = charset.newDecoder(); 5054 5055 try { 5056 encoder.encode(CharBuffer.allocate(10), null, true); 5057 errln("Illegal argument exception should have been thrown due to null target."); 5058 } catch (CoderMalfunctionError err) { 5059 // Java 16 updated handling of Exception thrown by encodeLoop(CharBuffer,ByteBuffer). 5060 // Previously when encodeLoop is called with null input/output buffer, it throws 5061 // IllegalArgumentException, and Java CharsetEncoder does not catch the exception. 5062 // In Java 16, a runtime exception thrown by encodeLoop implementation is caught 5063 // and wrapped by CoderMalfunctionError. This block is required because CoderMalfunctionError 5064 // is not an Exception. 5065 } catch (Exception ex) { 5066 // IllegalArgumentException is thrown by encodeLoop(CharBuffer,ByteBuffer) implementation 5067 // is not wrapped by CharsetEncoder up to Java 15. 5068 } 5069 5070 try { 5071 decoder.decode(ByteBuffer.allocate(10), null, true); 5072 errln("Illegal argument exception should have been thrown due to null target."); 5073 } catch (CoderMalfunctionError err) { 5074 } catch (Exception ex) { 5075 } 5076 } 5077 5078 // Test java canonical names 5079 @Test TestGetICUJavaCanonicalNames()5080 public void TestGetICUJavaCanonicalNames() { 5081 // Ambiguous charset name. 5082 String javaCName = CharsetProviderICU.getJavaCanonicalName("windows-1250"); 5083 String icuCName = CharsetProviderICU.getICUCanonicalName("Windows-1250"); 5084 if (javaCName == null || icuCName == null) { 5085 errln("Unable to get Java or ICU canonical name from ambiguous alias"); 5086 } 5087 5088 } 5089 5090 // Port over from ICU4C for test conversion tables (mbcs version 5.x) 5091 // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU. 5092 @Test TestCharsetTestData()5093 public void TestCharsetTestData() { 5094 CoderResult result = CoderResult.UNDERFLOW; 5095 String charsetName = "test4"; 5096 CharsetProvider provider = new CharsetProviderICU(); 5097 Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "com/ibm/icu/dev/data/testdata", 5098 this.getClass().getClassLoader()); 5099 CharsetEncoder encoder = charset.newEncoder(); 5100 CharsetDecoder decoder = charset.newDecoder(); 5101 5102 byte bytearray[] = { 5103 0x01, 0x02, 0x03, 0x0a, 5104 0x01, 0x02, 0x03, 0x0b, 5105 0x01, 0x02, 0x03, 0x0d, 5106 }; 5107 5108 // set the callback for overflow errors 5109 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null); 5110 5111 ByteBuffer bb = ByteBuffer.wrap(bytearray); 5112 CharBuffer cb = CharBuffer.allocate(10); 5113 5114 bb.limit(4); 5115 cb.limit(1); // Overflow should occur and is expected 5116 result = decoder.decode(bb, cb, false); 5117 if (result.isError()) { 5118 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5119 } 5120 5121 bb.limit(8); 5122 result = decoder.decode(bb, cb, false); 5123 if (result.isError()) { 5124 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5125 } 5126 5127 bb.limit(12); 5128 result = decoder.decode(bb, cb, true); 5129 if (result.isError()) { 5130 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5131 } 5132 5133 char chararray[] = { 5134 0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */ 5135 0xD940, /* first half of \U00060006 or \U00060007 */ 5136 0xDC07/* second half of \U00060007 */ 5137 }; 5138 5139 cb = CharBuffer.wrap(chararray); 5140 bb = ByteBuffer.allocate(10); 5141 5142 bb.limit(2); 5143 cb.limit(4); 5144 result = encoder.encode(cb, bb, false); 5145 if (result.isError()) { 5146 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5147 } 5148 cb.limit(5); 5149 result = encoder.encode(cb, bb, false); 5150 if (result.isError()) { 5151 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5152 } 5153 cb.limit(6); 5154 result = encoder.encode(cb, bb, true); 5155 if (!result.isError()) { 5156 errln("Error should have occurred while encoding: " + charsetName); 5157 } 5158 } 5159 5160 /* Round trip test of SCSU converter*/ 5161 @Test TestSCSUConverter()5162 public void TestSCSUConverter(){ 5163 byte allFeaturesSCSU[]={ 5164 0x41,(byte) 0xdf, 0x12,(byte) 0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x1b, 0x03, 5165 (byte)0xdf, 0x1c,(byte) 0x88,(byte) 0x80, 0x0b, (byte)0xbf,(byte) 0xff,(byte) 0xff, 0x0d, 0x0a, 5166 0x41, 0x10, (byte)0xdf, 0x12, (byte)0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x13, 5167 (byte)0xdf, 0x14,(byte) 0x80, 0x15, (byte)0xff 5168 }; 5169 5170 char allFeaturesUTF16[]={ 5171 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 5172 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 5173 0x01df, 0xf000, 0xdbff, 0xdfff 5174 }; 5175 5176 5177 char germanUTF16[]={ 5178 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 5179 }; 5180 5181 byte germanSCSU[]={ 5182 (byte)0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65,(byte) 0xdf, 0x74 5183 }; 5184 5185 char russianUTF16[]={ 5186 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 5187 }; 5188 5189 byte russianSCSU[]={ 5190 0x12, (byte)0x9c,(byte)0xbe,(byte) 0xc1, (byte)0xba, (byte)0xb2, (byte)0xb0 5191 }; 5192 5193 char japaneseUTF16[]={ 5194 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 5195 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 5196 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 5197 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 5198 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 5199 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 5200 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 5201 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 5202 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 5203 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 5204 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 5205 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 5206 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 5207 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 5208 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 5209 }; 5210 5211 // SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 5212 //it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient 5213 byte japaneseSCSU[]={ 5214 0x08, 0x00, 0x1b, 0x4c,(byte) 0xea, 0x16, (byte)0xca, (byte)0xd3,(byte) 0x94, 0x0f, 0x53, (byte)0xef, 0x61, 0x1b, (byte)0xe5,(byte) 0x84, 5215 (byte)0xc4, 0x0f, (byte)0x53,(byte) 0xef, 0x61, 0x1b, (byte)0xe5, (byte)0x84, (byte)0xc4, 0x16, (byte)0xca, (byte)0xd3, (byte)0x94, 0x08, 0x02, 0x0f, 5216 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, (byte)0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41,(byte) 0x88, 0x4c, 5217 (byte) 0xe5,(byte) 0x97, (byte)0x9f, 0x08, 0x0c, 0x16,(byte) 0xca,(byte) 0xd3, (byte)0x94, 0x15, (byte)0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 5218 (byte) 0x8c, (byte)0xb4, (byte)0xa3,(byte) 0x9f,(byte) 0xca, (byte)0x99, (byte)0xcb,(byte) 0x8b, (byte)0xc2,(byte) 0x97,(byte) 0xcc,(byte) 0xaa,(byte) 0x84, 0x08, 0x02, 0x0e, 5219 0x7c, 0x73, (byte)0xe2, 0x16, (byte)0xa3,(byte) 0xb7, (byte)0xcb, (byte)0x93, (byte)0xd3,(byte) 0xb4,(byte) 0xc5, (byte)0xdc, (byte)0x9f, 0x0e, 0x79, 0x3e, 5220 0x06, (byte)0xae, (byte)0xb1, (byte)0x9d,(byte) 0x93, (byte)0xd3, 0x08, 0x0c, (byte)0xbe,(byte) 0xa3, (byte)0x8f, 0x08,(byte) 0x88,(byte) 0xbe,(byte) 0xa3,(byte) 0x8d, 5221 (byte)0xd3,(byte) 0xa8, (byte)0xa3, (byte)0x97,(byte) 0xc5, 0x17,(byte) 0x89, 0x08, 0x0d, 0x15,(byte) 0xd2, 0x08, 0x01, (byte)0x93, (byte)0xc8,(byte) 0xaa, 5222 (byte)0x8f, 0x0e, 0x61, 0x1b, (byte)0x99,(byte) 0xcb, 0x0e, 0x4e, (byte)0xba, (byte)0x9f, (byte)0xa1,(byte) 0xae,(byte) 0x93, (byte)0xa8,(byte) 0xa0, 0x08, 5223 0x02, 0x08, 0x0c, (byte)0xe2, 0x16, (byte)0xa3, (byte)0xb7, (byte)0xcb, 0x0f, 0x4f,(byte) 0xe1,(byte) 0x80, 0x05,(byte) 0xec, 0x60, (byte)0x8d, 5224 (byte)0xea, 0x06,(byte) 0xd3,(byte) 0xe6, 0x0f,(byte) 0x8a, 0x00, 0x30, 0x44, 0x65,(byte) 0xb9, (byte)0xe4, (byte)0xfe,(byte) 0xe7,(byte) 0xc2, 0x06, 5225 (byte)0xcb, (byte)0x82 5226 }; 5227 5228 CharsetProviderICU cs = new CharsetProviderICU(); 5229 CharsetICU charset = (CharsetICU)cs.charsetForName("scsu"); 5230 CharsetDecoder decode = charset.newDecoder(); 5231 CharsetEncoder encode = charset.newEncoder(); 5232 5233 //String[] codePoints = {"allFeatures", "german","russian","japanese"}; 5234 byte[][] fromUnicode={allFeaturesSCSU,germanSCSU,russianSCSU,japaneseSCSU}; 5235 char[][] toUnicode = {allFeaturesUTF16, germanUTF16,russianUTF16,japaneseUTF16}; 5236 5237 for(int i=0;i<4;i++){ 5238 ByteBuffer decoderBuffer = ByteBuffer.wrap(fromUnicode[i]); 5239 CharBuffer encoderBuffer = CharBuffer.wrap(toUnicode[i]); 5240 5241 try{ 5242 // Decoding 5243 CharBuffer decoderResult = decode.decode(decoderBuffer); 5244 encoderBuffer.position(0); 5245 if(!decoderResult.equals(encoderBuffer)){ 5246 errln("Error occured while decoding "+ charset.name()); 5247 } 5248 // Encoding 5249 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5250 // RoundTrip Test 5251 ByteBuffer roundTrip = encoderResult; 5252 CharBuffer roundTripResult = decode.decode(roundTrip); 5253 encoderBuffer.position(0); 5254 if(!roundTripResult.equals(encoderBuffer)){ 5255 errln("Error occured while encoding "+ charset.name()); 5256 } 5257 // Test overflow for code coverage reasons 5258 if (i == 0) { 5259 ByteBuffer test = encoderResult; 5260 test.position(0); 5261 CharBuffer smallBuffer = CharBuffer.allocate(11); 5262 decode.reset(); 5263 CoderResult status = decode.decode(test, smallBuffer, true); 5264 if (status != CoderResult.OVERFLOW) { 5265 errln("Overflow buffer error should have been thrown."); 5266 } 5267 } 5268 }catch(Exception e){ 5269 errln("Exception while converting SCSU thrown: " + e); 5270 } 5271 } 5272 5273 /* Provide better code coverage */ 5274 /* testing illegal codepoints */ 5275 CoderResult illegalResult = CoderResult.UNDERFLOW; 5276 CharBuffer illegalDecoderTrgt = CharBuffer.allocate(10); 5277 5278 byte[] illegalDecoderSrc1 = { (byte)0x41, (byte)0xdf, (byte)0x0c }; 5279 decode.reset(); 5280 illegalResult = decode.decode(ByteBuffer.wrap(illegalDecoderSrc1), illegalDecoderTrgt, true); 5281 if (illegalResult == CoderResult.OVERFLOW || illegalResult == CoderResult.UNDERFLOW) { 5282 errln("Malformed error should have been returned for decoder " + charset.name()); 5283 } 5284 /* code coverage test from nucnvtst.c in ICU4C */ 5285 CoderResult ccResult = CoderResult.UNDERFLOW; 5286 int CCBufSize = 120 * 10; 5287 ByteBuffer trgt = ByteBuffer.allocate(CCBufSize); 5288 CharBuffer test = CharBuffer.allocate(CCBufSize); 5289 String [] ccSrc = { 5290 "\ud800\udc00", /* smallest surrogate*/ 5291 "\ud8ff\udcff", 5292 "\udBff\udFff", /* largest surrogate pair*/ 5293 "\ud834\udc00", 5294 //"\U0010FFFF", 5295 "Hello \u9292 \u9192 World!", 5296 "Hell\u0429o \u9292 \u9192 W\u00e4rld!", 5297 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5298 5299 "\u0648\u06c8", /* catch missing reset*/ 5300 "\u0648\u06c8", 5301 5302 "\u4444\uE001", /* lowest quotable*/ 5303 "\u4444\uf2FF", /* highest quotable*/ 5304 "\u4444\uf188\u4444", 5305 "\u4444\uf188\uf288", 5306 "\u4444\uf188abc\u0429\uf288", 5307 "\u9292\u2222", 5308 "Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!", 5309 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5310 "Hello World!123456", 5311 "Hello W\u0081\u011f\u0082!", /* Latin 1 run*/ 5312 5313 "abc\u0301\u0302", /* uses SQn for u301 u302*/ 5314 "abc\u4411d", /* uses SQU*/ 5315 "abc\u4411\u4412d",/* uses SCU*/ 5316 "abc\u0401\u0402\u047f\u00a5\u0405", /* uses SQn for ua5*/ 5317 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", /* SJIS like data*/ 5318 "\u9292\u2222", 5319 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", 5320 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", 5321 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", 5322 5323 "", /* empty input*/ 5324 "\u0000", /* smallest BMP character*/ 5325 "\uFFFF", /* largest BMP character*/ 5326 5327 /* regression tests*/ 5328 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", 5329 /*"\u00df\u01df\uf000\udbff\udfff\u000d\n\u0041\u00df\u0401\u015f\u00df\u01df\uf000\udbff\udfff",*/ 5330 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", 5331 "\u0041\u00df\u0401\u015f", 5332 "\u9066\u2123abc", 5333 //"\ud266\u43d7\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", 5334 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489", 5335 }; 5336 for (int i = 0; i < ccSrc.length; i++) { 5337 CharBuffer ubuf = CharBuffer.wrap(ccSrc[i]); 5338 encode.reset(); 5339 decode.reset(); 5340 trgt.clear(); 5341 test.clear(); 5342 ccResult = encode.encode(ubuf, trgt, true); 5343 if (ccResult.isError()) { 5344 errln("Error while encoding " + charset.name() + " in test for code coverage[" + i + "]."); 5345 } else { 5346 trgt.limit(trgt.position()); 5347 trgt.position(0); 5348 ccResult = decode.decode(trgt, test, true); 5349 if (ccResult.isError()) { 5350 errln("Error while decoding " + charset.name() + " in test for code coverage[" + i + "]."); 5351 } else { 5352 ubuf.position(0); 5353 test.limit(test.position()); 5354 test.position(0); 5355 if (!equals(test, ubuf)) { 5356 errln("Roundtrip failed for " + charset.name() + " in test for code coverage[" + i + "]."); 5357 } 5358 } 5359 } 5360 } 5361 5362 /* Monkey test */ 5363 { 5364 char[] monkeyIn = { 5365 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 5366 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 5367 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 5368 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 5369 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 5370 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 5371 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 5372 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 5373 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 5374 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 5375 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 5376 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 5377 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 5378 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 5379 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 5380 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 5381 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 5382 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 5383 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 5384 /* test non-BMP code points */ 5385 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 5386 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 5387 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 5388 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 5389 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 5390 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 5391 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 5392 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 5393 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 5394 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 5395 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 5396 5397 5398 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 5399 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 5400 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 5401 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 5402 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 5403 }; 5404 encode.reset(); 5405 decode.reset(); 5406 CharBuffer monkeyCB = CharBuffer.wrap(monkeyIn); 5407 try { 5408 ByteBuffer monkeyBB = encode.encode(monkeyCB); 5409 /* CharBuffer monkeyEndResult =*/ decode.decode(monkeyBB); 5410 5411 } catch (Exception ex) { 5412 errln("Exception thrown while encoding/decoding monkey test in SCSU: " + ex); 5413 } 5414 } 5415 // Test malformed 5416 { 5417 char[] malformedSequence = { 5418 0xD899, 0xDC7F, 0xDC88, 0xDC88, 0xD888, 0xDDF9 5419 }; 5420 encode.reset(); 5421 CharBuffer malformedSrc = CharBuffer.wrap(malformedSequence); 5422 5423 try { 5424 encode.encode(malformedSrc); 5425 errln("Malformed error should have thrown an exception."); 5426 } catch (Exception ex) { 5427 } 5428 } 5429 // Test overflow buffer 5430 { 5431 ByteBuffer overflowTest = ByteBuffer.wrap(allFeaturesSCSU); 5432 int sizes[] = { 8, 2, 11 }; 5433 for (int i = 0; i < sizes.length; i++) { 5434 try { 5435 decode.reset(); 5436 overflowTest.position(0); 5437 smBufDecode(decode, "SCSU overflow test", overflowTest, CharBuffer.allocate(sizes[i]), true, false); 5438 errln("Buffer overflow exception should have been thrown."); 5439 } catch (BufferOverflowException ex) { 5440 } catch (Exception ex) { 5441 errln("Buffer overflow exception should have been thrown."); 5442 } 5443 } 5444 5445 } 5446 } 5447 5448 /* Test for BOCU1 converter*/ 5449 @Test TestBOCU1Converter()5450 public void TestBOCU1Converter(){ 5451 char expected[]={ 5452 0xFEFF, 0x0061, 0x0062, 0x0020, // 0 5453 0x0063, 0x0061, 0x000D, 0x000A, 5454 5455 0x0020, 0x0000, 0x00DF, 0x00E6, // 8 5456 0x0930, 0x0020, 0x0918, 0x0909, 5457 5458 0x3086, 0x304D, 0x0020, 0x3053, // 16 5459 0x4000, 0x4E00, 0x7777, 0x0020, 5460 5461 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, // 24 5462 0x0020, 0xD7A3, 0xDC00, 0xD800, 5463 5464 0xD800, 0xDC00, 0xD845, 0xDDDD, // 32 5465 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 5466 5467 0xDFFF, 0x0001, 0x0E40, 0x0020, // 40 5468 0x0009 5469 }; 5470 5471 byte sampleText[]={ // from cintltst/bocu1tst.c/TestBOCU1 text 1 5472 (byte) 0xFB, 5473 (byte) 0xEE, 5474 0x28, // from source offset 0 5475 0x24, 0x1E, 0x52, (byte) 0xB2, 0x20, 5476 (byte) 0xB3, 5477 (byte) 0xB1, 5478 0x0D, 5479 0x0A, 5480 5481 0x20, // from 8 5482 0x00, (byte) 0xD0, 0x6C, (byte) 0xB6, (byte) 0xD8, (byte) 0xA5, 5483 0x20, 0x68, 5484 0x59, 5485 5486 (byte) 0xF9, 5487 0x28, // from 16 5488 0x6D, 0x20, 0x73, (byte) 0xE0, 0x2D, (byte) 0xDE, 0x43, 5489 (byte) 0xD0, 0x33, 0x20, 5490 5491 (byte) 0xFA, 5492 (byte) 0x83, // from 24 5493 0x25, 0x01, (byte) 0xFB, 0x16, (byte) 0x87, 0x4B, 0x16, 0x20, 5494 (byte) 0xE6, (byte) 0xBD, (byte) 0xEB, 0x5B, 0x4B, (byte) 0xCC, 5495 5496 (byte) 0xF9, 5497 (byte) 0xA2, // from 32 5498 (byte) 0xFC, 0x10, 0x3E, (byte) 0xFE, 0x16, 0x3A, (byte) 0x8C, 5499 0x20, (byte) 0xFC, 0x03, (byte) 0xAC, 5500 5501 0x01, /// from 41 5502 (byte) 0xDE, (byte) 0x83, 0x20, 0x09 5503 }; 5504 5505 CharsetProviderICU cs = new CharsetProviderICU(); 5506 CharsetICU charset = (CharsetICU)cs.charsetForName("BOCU-1"); 5507 CharsetDecoder decode = charset.newDecoder(); 5508 CharsetEncoder encode = charset.newEncoder(); 5509 5510 ByteBuffer decoderBuffer = ByteBuffer.wrap(sampleText); 5511 CharBuffer encoderBuffer = CharBuffer.wrap(expected); 5512 try{ 5513 // Decoding 5514 CharBuffer decoderResult = decode.decode(decoderBuffer); 5515 5516 encoderBuffer.position(0); 5517 if(!decoderResult.equals(encoderBuffer)){ 5518 errln("Error occured while decoding "+ charset.name()); 5519 } 5520 // Encoding 5521 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5522 // RoundTrip Test 5523 ByteBuffer roundTrip = encoderResult; 5524 CharBuffer roundTripResult = decode.decode(roundTrip); 5525 5526 encoderBuffer.position(0); 5527 if(!roundTripResult.equals(encoderBuffer)){ 5528 errln("Error occured while encoding "+ charset.name()); 5529 } 5530 }catch(Exception e){ 5531 errln("Exception while converting BOCU-1 thrown: " + e); 5532 } 5533 } 5534 5535 /* Test that ICU4C and ICU4J get the same ICU canonical name when given the same alias. */ 5536 @Test TestICUCanonicalNameConsistency()5537 public void TestICUCanonicalNameConsistency() { 5538 String[] alias = { 5539 "KSC_5601" 5540 }; 5541 String[] expected = { 5542 "windows-949-2000" 5543 }; 5544 5545 for (int i = 0; i < alias.length; i++) { 5546 String name = CharsetProviderICU.getICUCanonicalName(alias[i]); 5547 if (!name.equals(expected[i])) { 5548 errln("The ICU canonical name in ICU4J does not match that in ICU4C. Result: " + name + "Expected: " + expected[i]); 5549 } 5550 } 5551 } 5552 5553 /* Increase code coverage for CharsetICU and CharsetProviderICU*/ 5554 @Test TestCharsetICUCodeCoverage()5555 public void TestCharsetICUCodeCoverage() { 5556 CharsetProviderICU provider = new CharsetProviderICU(); 5557 5558 if (provider.charsetForName("UTF16", null) != null) { 5559 errln("charsetForName should have returned a null"); 5560 } 5561 5562 if (CharsetProviderICU.getJavaCanonicalName(null) != null) { 5563 errln("getJavaCanonicalName should have returned a null when null is given to it."); 5564 } 5565 5566 try { 5567 Charset testCharset = CharsetICU.forNameICU("bogus"); 5568 errln("UnsupportedCharsetException should be thrown for charset \"bogus\" - but got charset " + testCharset.name()); 5569 } catch (UnsupportedCharsetException ex) { 5570 logln("UnsupportedCharsetException was thrown for CharsetICU.forNameICU(\"bogus\")"); 5571 } 5572 5573 Charset charset = provider.charsetForName("UTF16"); 5574 5575 try { 5576 ((CharsetICU)charset).getUnicodeSet(null, 0); 5577 } catch (IllegalArgumentException ex) { 5578 return; 5579 } 5580 errln("IllegalArgumentException should have been thrown."); 5581 } 5582 5583 @Test TestCharsetLMBCS()5584 public void TestCharsetLMBCS() { 5585 String []lmbcsNames = { 5586 "LMBCS-1", 5587 "LMBCS-2", 5588 "LMBCS-3", 5589 "LMBCS-4", 5590 "LMBCS-5", 5591 "LMBCS-6", 5592 "LMBCS-8", 5593 "LMBCS-11", 5594 "LMBCS-16", 5595 "LMBCS-17", 5596 "LMBCS-18", 5597 "LMBCS-19" 5598 }; 5599 5600 char[] src = { 5601 0x0192, 0x0041, 0x0061, 0x00D0, 0x00F6, 0x0100, 0x0174, 0x02E4, 0x03F5, 0x03FB, 5602 0x05D3, 0x05D4, 0x05EA, 0x0684, 0x0685, 0x1801, 0x11B3, 0x11E8, 0x1F9A, 0x2EB4, 5603 0x3157, 0x3336, 0x3304, 0xD881, 0xDC88 5604 }; 5605 CharBuffer cbInput = CharBuffer.wrap(src); 5606 5607 CharsetProviderICU provider = new CharsetProviderICU(); 5608 5609 for (int i = 0; i < lmbcsNames.length; i++) { 5610 Charset charset = provider.charsetForName(lmbcsNames[i]); 5611 if (charset == null) { 5612 errln("Unable to create LMBCS charset: " + lmbcsNames[i]); 5613 return; 5614 } 5615 CharsetEncoder encoder = charset.newEncoder(); 5616 CharsetDecoder decoder = charset.newDecoder(); 5617 5618 try { 5619 cbInput.position(0); 5620 ByteBuffer bbTmp = encoder.encode(cbInput); 5621 CharBuffer cbOutput = decoder.decode(bbTmp); 5622 5623 if (!equals(cbInput, cbOutput)) { 5624 errln("Roundtrip test failed for charset: " + lmbcsNames[i]); 5625 } 5626 } catch (Exception ex) { 5627 if (i >= 8) { 5628 /* Expected exceptions */ 5629 continue; 5630 } 5631 errln("Exception thrown: " + ex + " while using charset: " + lmbcsNames[i]); 5632 } 5633 5634 } 5635 5636 // Test malformed 5637 CoderResult malformedResult = CoderResult.UNDERFLOW; 5638 byte[] malformedBytes = { 5639 (byte)0x61, (byte)0x01, (byte)0x29, (byte)0x81, (byte)0xa0, (byte)0x0f 5640 }; 5641 ByteBuffer malformedSrc = ByteBuffer.wrap(malformedBytes); 5642 CharBuffer malformedTrgt = CharBuffer.allocate(10); 5643 int[] malformedLimits = { 5644 2, 6 5645 }; 5646 CharsetDecoder malformedDecoderTest = provider.charsetForName("LMBCS-1").newDecoder(); 5647 for (int n = 0; n < malformedLimits.length; n++) { 5648 malformedDecoderTest.reset(); 5649 5650 malformedSrc.position(0); 5651 malformedSrc.limit(malformedLimits[n]); 5652 5653 malformedTrgt.clear(); 5654 5655 malformedResult = malformedDecoderTest.decode(malformedSrc,malformedTrgt, true); 5656 if (!malformedResult.isMalformed()) { 5657 errln("Malformed error should have resulted."); 5658 } 5659 } 5660 } 5661 5662 /* 5663 * This is a port of ICU4C TestAmbiguousConverter in cintltst. 5664 * Since there is no concept of ambiguous converters in ICU4J 5665 * this test is merely for code coverage reasons. 5666 */ 5667 @Test TestAmbiguousConverter()5668 public void TestAmbiguousConverter() { 5669 byte [] inBytes = { 5670 0x61, 0x5b, 0x5c 5671 }; 5672 ByteBuffer src = ByteBuffer.wrap(inBytes); 5673 CharBuffer trgt = CharBuffer.allocate(20); 5674 5675 CoderResult result = CoderResult.UNDERFLOW; 5676 CharsetProviderICU provider = new CharsetProviderICU(); 5677 String[] names = CharsetProviderICU.getAllNames(); 5678 5679 for (int i = 0; i < names.length; i++) { 5680 Charset charset = provider.charsetForName(names[i]); 5681 if (charset == null) { 5682 /* We don't care about any failures because not all converters are available. */ 5683 continue; 5684 } 5685 CharsetDecoder decoder = charset.newDecoder(); 5686 5687 src.position(0); 5688 trgt.clear(); 5689 5690 result = decoder.decode(src, trgt, true); 5691 if (result.isError()) { 5692 /* We don't care about any failures. */ 5693 continue; 5694 } 5695 } 5696 } 5697 5698 @Test TestIsFixedWidth()5699 public void TestIsFixedWidth(){ 5700 String[] fixedWidth = { 5701 "US-ASCII", 5702 "UTF32", 5703 "ibm-5478_P100-1995" 5704 }; 5705 5706 String[] notFixedWidth = { 5707 "GB18030", 5708 "UTF8", 5709 "windows-949-2000", 5710 "UTF16" 5711 }; 5712 CharsetProvider provider = new CharsetProviderICU(); 5713 Charset charset; 5714 5715 for (int i = 0; i < fixedWidth.length; i++) { 5716 charset = provider.charsetForName(fixedWidth[i]); 5717 5718 if (!((CharsetICU)charset).isFixedWidth()) { 5719 errln(fixedWidth[i] + " is a fixedWidth charset but returned false."); 5720 } 5721 } 5722 5723 for (int i = 0; i < notFixedWidth.length; i++) { 5724 charset = provider.charsetForName(notFixedWidth[i]); 5725 5726 if (((CharsetICU)charset).isFixedWidth()) { 5727 errln(notFixedWidth[i] + " is NOT a fixedWidth charset but returned true."); 5728 } 5729 } 5730 } 5731 5732 @Test TestBytesLengthForString()5733 public void TestBytesLengthForString() { 5734 CharsetProviderICU provider = new CharsetProviderICU(); 5735 String[] charsets = { 5736 "windows-949-2000", 5737 "ibm-1047_P100-1995,swaplfnl", 5738 "ibm-930_P120-1999", 5739 "ISCII,version=0", 5740 "ISO_2022,locale=ko,version=0" 5741 }; 5742 5743 int[] expected = { 5744 40, 5745 20, 5746 80, /* changed from 60 to 80 to reflect the updates by #9205 */ 5747 80, 5748 160 5749 }; 5750 5751 int stringLength = 10; 5752 int length; 5753 int maxCharSize; 5754 5755 for (int i = 0; i < charsets.length; i++) { 5756 maxCharSize = (int)provider.charsetForName(charsets[i]).newEncoder().maxBytesPerChar(); 5757 length = CharsetEncoderICU.getMaxBytesForString(stringLength, maxCharSize); 5758 5759 if (length != expected[i]) { 5760 errln("For charset " + charsets[i] + " with string length " + stringLength + ", expected max byte length is " + expected[i] + " but got " + length); 5761 } 5762 } 5763 } 5764 5765 /* 5766 * When converting slices of a larger CharBuffer, Charset88591 and CharsetASCII does not handle the buffer correctly when 5767 * an unmappable character occurs. 5768 * Ticket #8729 5769 */ 5770 @Test TestCharsetASCII8859BufferHandling()5771 public void TestCharsetASCII8859BufferHandling() { 5772 String firstLine = "C077693790=|MEMO=|00=|022=|Blanche st and the driveway grate was fault and rotated under my car=|\r\n"; 5773 String secondLine = "C077693790=|MEMO=|00=|023=|puncturing the fuel tank. I spoke to the store operator (Ram Reddi –=|\r\n"; 5774 5775 String charsetNames[] = { 5776 "ASCII", 5777 "ISO-8859-1" 5778 }; 5779 5780 CoderResult result = CoderResult.UNDERFLOW; 5781 5782 CharsetEncoder encoder; 5783 5784 ByteBuffer outBuffer = ByteBuffer.allocate(500); 5785 CharBuffer charBuffer = CharBuffer.allocate(firstLine.length() + secondLine.length()); 5786 charBuffer.put(firstLine); 5787 charBuffer.put(secondLine); 5788 charBuffer.flip(); 5789 5790 for (int i = 0; i < charsetNames.length; i++) { 5791 encoder = CharsetICU.forNameICU(charsetNames[i]).newEncoder(); 5792 5793 charBuffer.position(firstLine.length()); 5794 CharBuffer charBufferSlice = charBuffer.slice(); 5795 charBufferSlice.limit(secondLine.length() - 2); 5796 5797 5798 try { 5799 result = encoder.encode(charBufferSlice, outBuffer, false); 5800 if (!result.isUnmappable()) { 5801 errln("Result of encoding " + charsetNames[i] + " should be: \"Unmappable\". Instead got: " + result); 5802 } 5803 } catch (IllegalArgumentException ex) { 5804 errln("IllegalArgumentException should not have been thrown when encoding: " + charsetNames[i]); 5805 } 5806 } 5807 } 5808 5809 /* 5810 * When converting with the String method getBytes(), buffer overflow exception is thrown because 5811 * of the way ICU4J is calculating the max bytes per char. This should be changed only on the ICU4J 5812 * side to match what the Java method is expecting. The ICU4C size will be left unchanged. 5813 * Ticket #9205 5814 */ 5815 @Test TestBufferOverflowErrorUsingJavagetBytes()5816 public void TestBufferOverflowErrorUsingJavagetBytes() { 5817 String charsetName = "ibm-5035"; 5818 String testCase = "\u7d42"; 5819 5820 try { 5821 testCase.getBytes(charsetName); 5822 } catch (Exception ex) { 5823 errln("Error calling getBytes(): " + ex); 5824 } 5825 5826 } 5827 5828 // Test that all code points which have the default ignorable Unicode property 5829 // are ignored if they have no mapping. 5830 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) 5831 // in CharsetCallback.java should be updated. 5832 // Keep in sync with ICU4C intltest/convtest.cpp. 5833 @Test TestDefaultIgnorableCallback()5834 public void TestDefaultIgnorableCallback() { 5835 String cnv_name = "euc-jp-2007"; 5836 String pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; 5837 String pattern_not_ignorable = 5838 "[[:^Default_Ignorable_Code_Point:]" + 5839 // For test performance, skip large ranges that will likely remain unassigned 5840 // for a long time, and private use code points. 5841 "-[\\U00040000-\\U000DFFFF]-[:Co:]" + 5842 "]"; 5843 UnicodeSet set_ignorable = new UnicodeSet(pattern_ignorable); 5844 UnicodeSet set_not_ignorable = new UnicodeSet(pattern_not_ignorable); 5845 CharsetEncoder encoder = CharsetICU.forNameICU(cnv_name).newEncoder(); 5846 5847 // set callback for the converter 5848 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 5849 encoder.onMalformedInput(CodingErrorAction.REPLACE); 5850 5851 // test ignorable code points are ignored 5852 UnicodeSetIterator iter = new UnicodeSetIterator(set_ignorable); 5853 while (iter.next()) { 5854 encoder.reset(); 5855 int c = iter.codepoint; 5856 try { 5857 if(encoder.encode(CharBuffer.wrap(Character.toChars(c))).limit() > 0) { 5858 errln("Callback should have ignore default ignorable: U+" + Integer.toHexString(c)); 5859 } 5860 } catch (Exception ex) { 5861 errln("Error received converting +" + Integer.toHexString(c)); 5862 } 5863 } 5864 5865 // test non-ignorable code points are not ignored 5866 iter.reset(set_not_ignorable); 5867 while (iter.next()) { 5868 encoder.reset(); 5869 int c = iter.codepoint; 5870 try { 5871 if(encoder.encode(CharBuffer.wrap(Character.toChars(c))).limit() == 0) { 5872 errln("Callback should not have ignored: U+" + Integer.toHexString(c)); 5873 } 5874 } catch (Exception ex) { 5875 errln("Error received converting U+" + Integer.toHexString(c)); 5876 } 5877 } 5878 } 5879 } 5880