1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.charset; 11 12 import java.nio.BufferOverflowException; 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.charset.CharacterCodingException; 16 import java.nio.charset.Charset; 17 import java.nio.charset.CharsetDecoder; 18 import java.nio.charset.CharsetEncoder; 19 import java.nio.charset.CoderResult; 20 import java.nio.charset.CodingErrorAction; 21 import java.nio.charset.UnsupportedCharsetException; 22 import java.nio.charset.spi.CharsetProvider; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.Iterator; 26 import java.util.MissingResourceException; 27 import java.util.Set; 28 import java.util.SortedMap; 29 30 import org.junit.Test; 31 import org.junit.runner.RunWith; 32 import org.junit.runners.JUnit4; 33 34 import com.ibm.icu.charset.CharsetCallback; 35 import com.ibm.icu.charset.CharsetDecoderICU; 36 import com.ibm.icu.charset.CharsetEncoderICU; 37 import com.ibm.icu.charset.CharsetICU; 38 import com.ibm.icu.charset.CharsetProviderICU; 39 import com.ibm.icu.dev.test.TestFmwk; 40 import com.ibm.icu.text.UTF16; 41 import com.ibm.icu.text.UnicodeSet; 42 43 @RunWith(JUnit4.class) 44 public class TestCharset extends TestFmwk { 45 @Test TestUTF16Converter()46 public void TestUTF16Converter(){ 47 CharsetProvider icu = new CharsetProviderICU(); 48 Charset cs1 = icu.charsetForName("UTF-16BE"); 49 CharsetEncoder e1 = cs1.newEncoder(); 50 CharsetDecoder d1 = cs1.newDecoder(); 51 52 Charset cs2 = icu.charsetForName("UTF-16LE"); 53 CharsetEncoder e2 = cs2.newEncoder(); 54 CharsetDecoder d2 = cs2.newDecoder(); 55 56 for(int i=0x0000; i<0x10FFFF; i+=0xFF){ 57 CharBuffer us = CharBuffer.allocate(0xFF*2); 58 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 59 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 60 for(int j=0;j<0xFF; j++){ 61 int c = i+j; 62 63 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 64 continue; 65 } 66 67 if(c>0xFFFF){ 68 char lead = UTF16.getLeadSurrogate(c); 69 char trail = UTF16.getTrailSurrogate(c); 70 if(!UTF16.isLeadSurrogate(lead)){ 71 errln("lead is not lead!"+lead+" for cp: \\U"+Integer.toHexString(c)); 72 continue; 73 } 74 if(!UTF16.isTrailSurrogate(trail)){ 75 errln("trail is not trail!"+trail); 76 continue; 77 } 78 us.put(lead); 79 us.put(trail); 80 bs1.put((byte)(lead>>8)); 81 bs1.put((byte)(lead&0xFF)); 82 bs1.put((byte)(trail>>8)); 83 bs1.put((byte)(trail&0xFF)); 84 85 bs2.put((byte)(lead&0xFF)); 86 bs2.put((byte)(lead>>8)); 87 bs2.put((byte)(trail&0xFF)); 88 bs2.put((byte)(trail>>8)); 89 }else{ 90 91 if(c<0xFF){ 92 bs1.put((byte)0x00); 93 bs1.put((byte)(c)); 94 bs2.put((byte)(c)); 95 bs2.put((byte)0x00); 96 }else{ 97 bs1.put((byte)(c>>8)); 98 bs1.put((byte)(c&0xFF)); 99 100 bs2.put((byte)(c&0xFF)); 101 bs2.put((byte)(c>>8)); 102 } 103 us.put((char)c); 104 } 105 } 106 107 108 us.limit(us.position()); 109 us.position(0); 110 if(us.length()==0){ 111 continue; 112 } 113 114 115 bs1.limit(bs1.position()); 116 bs1.position(0); 117 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 118 //newBS.put((byte)0xFE); 119 //newBS.put((byte)0xFF); 120 newBS.put(bs1); 121 bs1.position(0); 122 smBufDecode(d1, "UTF-16", bs1, us); 123 smBufEncode(e1, "UTF-16", us, newBS); 124 125 bs2.limit(bs2.position()); 126 bs2.position(0); 127 newBS.clear(); 128 //newBS.put((byte)0xFF); 129 //newBS.put((byte)0xFE); 130 newBS.put(bs2); 131 bs2.position(0); 132 smBufDecode(d2, "UTF16-LE", bs2, us); 133 smBufEncode(e2, "UTF-16LE", us, newBS); 134 135 } 136 } 137 138 @Test TestUTF32Converter()139 public void TestUTF32Converter(){ 140 CharsetProvider icu = new CharsetProviderICU(); 141 Charset cs1 = icu.charsetForName("UTF-32BE"); 142 CharsetEncoder e1 = cs1.newEncoder(); 143 CharsetDecoder d1 = cs1.newDecoder(); 144 145 Charset cs2 = icu.charsetForName("UTF-32LE"); 146 CharsetEncoder e2 = cs2.newEncoder(); 147 CharsetDecoder d2 = cs2.newDecoder(); 148 149 for(int i=0x000; i<0x10FFFF; i+=0xFF){ 150 CharBuffer us = CharBuffer.allocate(0xFF*2); 151 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 152 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 153 for(int j=0;j<0xFF; j++){ 154 int c = i+j; 155 156 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 157 continue; 158 } 159 160 if(c>0xFFFF){ 161 char lead = UTF16.getLeadSurrogate(c); 162 char trail = UTF16.getTrailSurrogate(c); 163 164 us.put(lead); 165 us.put(trail); 166 }else{ 167 us.put((char)c); 168 } 169 bs1.put((byte) (c >>> 24)); 170 bs1.put((byte) (c >>> 16)); 171 bs1.put((byte) (c >>> 8)); 172 bs1.put((byte) (c & 0xFF)); 173 174 bs2.put((byte) (c & 0xFF)); 175 bs2.put((byte) (c >>> 8)); 176 bs2.put((byte) (c >>> 16)); 177 bs2.put((byte) (c >>> 24)); 178 } 179 bs1.limit(bs1.position()); 180 bs1.position(0); 181 bs2.limit(bs2.position()); 182 bs2.position(0); 183 us.limit(us.position()); 184 us.position(0); 185 if(us.length()==0){ 186 continue; 187 } 188 189 190 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 191 192 newBS.put((byte)0x00); 193 newBS.put((byte)0x00); 194 newBS.put((byte)0xFE); 195 newBS.put((byte)0xFF); 196 197 newBS.put(bs1); 198 bs1.position(0); 199 smBufDecode(d1, "UTF-32", bs1, us); 200 smBufEncode(e1, "UTF-32", us, newBS); 201 202 203 newBS.clear(); 204 205 newBS.put((byte)0xFF); 206 newBS.put((byte)0xFE); 207 newBS.put((byte)0x00); 208 newBS.put((byte)0x00); 209 210 newBS.put(bs2); 211 bs2.position(0); 212 smBufDecode(d2, "UTF-32LE", bs2, us); 213 smBufEncode(e2, "UTF-32LE", us, newBS); 214 215 } 216 } 217 218 @Test TestASCIIConverter()219 public void TestASCIIConverter() { 220 runTestASCIIBasedConverter("ASCII", 0x80); 221 } 222 223 @Test Test88591Converter()224 public void Test88591Converter() { 225 runTestASCIIBasedConverter("iso-8859-1", 0x100); 226 } 227 runTestASCIIBasedConverter(String converter, int limit)228 public void runTestASCIIBasedConverter(String converter, int limit){ 229 CharsetProvider icu = new CharsetProviderICU(); 230 Charset icuChar = icu.charsetForName(converter); 231 CharsetEncoder encoder = icuChar.newEncoder(); 232 CharsetDecoder decoder = icuChar.newDecoder(); 233 CoderResult cr; 234 235 /* test with and without array-backed buffers */ 236 237 byte[] bytes = new byte[0x10000]; 238 char[] chars = new char[0x10000]; 239 for (int j = 0; j <= 0xffff; j++) { 240 bytes[j] = (byte) j; 241 chars[j] = (char) j; 242 } 243 244 boolean fail = false; 245 boolean arrays = false; 246 boolean decoding = false; 247 int i; 248 249 // 0 thru limit - 1 250 ByteBuffer bs = ByteBuffer.wrap(bytes, 0, limit); 251 CharBuffer us = CharBuffer.wrap(chars, 0, limit); 252 smBufDecode(decoder, converter, bs, us, true); 253 smBufDecode(decoder, converter, bs, us, false); 254 smBufEncode(encoder, converter, us, bs, true); 255 smBufEncode(encoder, converter, us, bs, false); 256 for (i = 0; i < limit; i++) { 257 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 258 us = CharBuffer.wrap(chars, i, 1).slice(); 259 try { 260 decoding = true; 261 arrays = true; 262 smBufDecode(decoder, converter, bs, us, true, false, true); 263 264 decoding = true; 265 arrays = false; 266 smBufDecode(decoder, converter, bs, us, true, false, false); 267 268 decoding = false; 269 arrays = true; 270 smBufEncode(encoder, converter, us, bs, true, false, true); 271 272 decoding = false; 273 arrays = false; 274 smBufEncode(encoder, converter, us, bs, true, false, false); 275 276 } catch (Exception ex) { 277 errln("Failed to fail to " + (decoding ? "decode" : "encode") + " 0x" 278 + Integer.toHexString(i) + (arrays ? " with arrays" : " without arrays") + " in " + converter); 279 return; 280 } 281 } 282 283 // decode limit thru 255 284 for (i = limit; i <= 0xff; i++) { 285 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 286 us = CharBuffer.wrap(chars, i, 1).slice(); 287 try { 288 smBufDecode(decoder, converter, bs, us, true, false, true); 289 fail = true; 290 arrays = true; 291 break; 292 } catch (Exception ex) { 293 } 294 try { 295 smBufDecode(decoder, converter, bs, us, true, false, false); 296 fail = true; 297 arrays = false; 298 break; 299 } catch (Exception ex) { 300 } 301 } 302 if (fail) { 303 errln("Failed to fail to decode 0x" + Integer.toHexString(i) 304 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 305 return; 306 } 307 308 // encode limit thru 0xffff, skipping through much of the 1ff to feff range to save 309 // time (it would take too much time to test every possible case) 310 for (i = limit; i <= 0xffff; i = ((i>=0x1ff && i<0xfeff) ? i+0xfd : i+1)) { 311 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 312 us = CharBuffer.wrap(chars, i, 1).slice(); 313 try { 314 smBufEncode(encoder, converter, us, bs, true, false, true); 315 fail = true; 316 arrays = true; 317 break; 318 } catch (Exception ex) { 319 } 320 try { 321 smBufEncode(encoder, converter, us, bs, true, false, false); 322 fail = true; 323 arrays = false; 324 break; 325 } catch (Exception ex) { 326 } 327 } 328 if (fail) { 329 errln("Failed to fail to encode 0x" + Integer.toHexString(i) 330 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 331 return; 332 } 333 334 // test overflow / underflow edge cases 335 outer: for (int n = 1; n <= 3; n++) { 336 for (int m = 0; m < n; m++) { 337 // expecting underflow 338 try { 339 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 340 us = CharBuffer.wrap(chars, 'a', m).slice(); 341 smBufDecode(decoder, converter, bs, us, true, false, true); 342 smBufDecode(decoder, converter, bs, us, true, false, false); 343 smBufEncode(encoder, converter, us, bs, true, false, true); 344 smBufEncode(encoder, converter, us, bs, true, false, false); 345 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 346 us = CharBuffer.wrap(chars, 'a', n).slice(); 347 smBufDecode(decoder, converter, bs, us, true, false, true, m); 348 smBufDecode(decoder, converter, bs, us, true, false, false, m); 349 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 350 us = CharBuffer.wrap(chars, 'a', m).slice(); 351 smBufEncode(encoder, converter, us, bs, true, false, true, m); 352 smBufEncode(encoder, converter, us, bs, true, false, false, m); 353 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 354 us = CharBuffer.wrap(chars, 'a', n).slice(); 355 smBufDecode(decoder, converter, bs, us, true, false, true); 356 smBufDecode(decoder, converter, bs, us, true, false, false); 357 smBufEncode(encoder, converter, us, bs, true, false, true); 358 smBufEncode(encoder, converter, us, bs, true, false, false); 359 } catch (Exception ex) { 360 fail = true; 361 break outer; 362 } 363 364 // expecting overflow 365 try { 366 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 367 us = CharBuffer.wrap(chars, 'a', m).slice(); 368 smBufDecode(decoder, converter, bs, us, true, false, true); 369 fail = true; 370 break; 371 } catch (Exception ex) { 372 if (!(ex instanceof BufferOverflowException)) { 373 fail = true; 374 break outer; 375 } 376 } 377 try { 378 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 379 us = CharBuffer.wrap(chars, 'a', m).slice(); 380 smBufDecode(decoder, converter, bs, us, true, false, false); 381 fail = true; 382 } catch (Exception ex) { 383 if (!(ex instanceof BufferOverflowException)) { 384 fail = true; 385 break outer; 386 } 387 } 388 try { 389 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 390 us = CharBuffer.wrap(chars, 'a', n).slice(); 391 smBufEncode(encoder, converter, us, bs, true, false, true); 392 fail = true; 393 } catch (Exception ex) { 394 if (!(ex instanceof BufferOverflowException)) { 395 fail = true; 396 break outer; 397 } 398 } 399 try { 400 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 401 us = CharBuffer.wrap(chars, 'a', n).slice(); 402 smBufEncode(encoder, converter, us, bs, true, false, false); 403 fail = true; 404 } catch (Exception ex) { 405 if (!(ex instanceof BufferOverflowException)) { 406 fail = true; 407 break outer; 408 } 409 } 410 } 411 } 412 if (fail) { 413 errln("Incorrect result in " + converter + " for underflow / overflow edge cases"); 414 return; 415 } 416 417 // test surrogate combinations in encoding 418 String lead = "\ud888"; 419 String trail = "\udc88"; 420 String norm = "a"; 421 String ext = "\u0275"; // theta 422 String end = ""; 423 bs = ByteBuffer.wrap(new byte[] { 0 }); 424 String[] input = new String[] { // 425 lead + lead, // malf(1) 426 lead + trail, // unmap(2) 427 lead + norm, // malf(1) 428 lead + ext, // malf(1) 429 lead + end, // malf(1) 430 trail + norm, // malf(1) 431 trail + end, // malf(1) 432 ext + norm, // unmap(1) 433 ext + end, // unmap(1) 434 }; 435 CoderResult[] result = new CoderResult[] { 436 CoderResult.malformedForLength(1), 437 CoderResult.unmappableForLength(2), 438 CoderResult.malformedForLength(1), 439 CoderResult.malformedForLength(1), 440 CoderResult.malformedForLength(1), 441 CoderResult.malformedForLength(1), 442 CoderResult.malformedForLength(1), 443 CoderResult.unmappableForLength(1), 444 CoderResult.unmappableForLength(1), 445 }; 446 447 for (int index = 0; index < input.length; index++) { 448 CharBuffer source = CharBuffer.wrap(input[index]); 449 cr = encoder.encode(source, bs, true); 450 bs.rewind(); 451 encoder.reset(); 452 453 // if cr != results[x] 454 if (!((cr.isUnderflow() && result[index].isUnderflow()) 455 || (cr.isOverflow() && result[index].isOverflow()) 456 || (cr.isMalformed() && result[index].isMalformed()) 457 || (cr.isUnmappable() && result[index].isUnmappable())) 458 || (cr.isError() && cr.length() != result[index].length())) { 459 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 460 break; 461 } 462 463 source = CharBuffer.wrap(input[index].toCharArray()); 464 cr = encoder.encode(source, bs, true); 465 bs.rewind(); 466 encoder.reset(); 467 468 // if cr != results[x] 469 if (!((cr.isUnderflow() && result[index].isUnderflow()) 470 || (cr.isOverflow() && result[index].isOverflow()) 471 || (cr.isMalformed() && result[index].isMalformed()) 472 || (cr.isUnmappable() && result[index].isUnmappable())) 473 || (cr.isError() && cr.length() != result[index].length())) { 474 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 475 break; 476 } 477 } 478 } 479 480 @Test TestUTF8Converter()481 public void TestUTF8Converter() { 482 String converter = "UTF-8"; 483 CharsetProvider icu = new CharsetProviderICU(); 484 Charset icuChar = icu.charsetForName(converter); 485 CharsetEncoder encoder = icuChar.newEncoder(); 486 CharsetDecoder decoder = icuChar.newDecoder(); 487 ByteBuffer bs; 488 CharBuffer us; 489 CoderResult cr; 490 491 492 int[] size = new int[] { 1<<7, 1<<11, 1<<16 }; // # of 1,2,3 byte combinations 493 byte[] bytes = new byte[size[0] + size[1]*2 + size[2]*3]; 494 char[] chars = new char[size[0] + size[1] + size[2]]; 495 int i = 0; 496 int x, y; 497 498 // 0 to 1 << 7 (1 byters) 499 for (; i < size[0]; i++) { 500 bytes[i] = (byte) i; 501 chars[i] = (char) i; 502 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 503 us = CharBuffer.wrap(chars, i, 1).slice(); 504 try { 505 smBufDecode(decoder, converter, bs, us, true, false, true); 506 smBufDecode(decoder, converter, bs, us, true, false, false); 507 smBufEncode(encoder, converter, us, bs, true, false, true); 508 smBufEncode(encoder, converter, us, bs, true, false, false); 509 } catch (Exception ex) { 510 errln("Incorrect result in " + converter + " for 0x" 511 + Integer.toHexString(i)); 512 break; 513 } 514 } 515 516 // 1 << 7 to 1 << 11 (2 byters) 517 for (; i < size[1]; i++) { 518 x = size[0] + i*2; 519 y = size[0] + i; 520 bytes[x + 0] = (byte) (0xc0 | ((i >> 6) & 0x1f)); 521 bytes[x + 1] = (byte) (0x80 | ((i >> 0) & 0x3f)); 522 chars[y] = (char) i; 523 bs = ByteBuffer.wrap(bytes, x, 2).slice(); 524 us = CharBuffer.wrap(chars, y, 1).slice(); 525 try { 526 smBufDecode(decoder, converter, bs, us, true, false, true); 527 smBufDecode(decoder, converter, bs, us, true, false, false); 528 smBufEncode(encoder, converter, us, bs, true, false, true); 529 smBufEncode(encoder, converter, us, bs, true, false, false); 530 } catch (Exception ex) { 531 errln("Incorrect result in " + converter + " for 0x" 532 + Integer.toHexString(i)); 533 break; 534 } 535 } 536 537 // 1 << 11 to 1 << 16 (3 byters and surrogates) 538 for (; i < size[2]; i++) { 539 x = size[0] + size[1] * 2 + i * 3; 540 y = size[0] + size[1] + i; 541 bytes[x + 0] = (byte) (0xe0 | ((i >> 12) & 0x0f)); 542 bytes[x + 1] = (byte) (0x80 | ((i >> 6) & 0x3f)); 543 bytes[x + 2] = (byte) (0x80 | ((i >> 0) & 0x3f)); 544 chars[y] = (char) i; 545 if (!UTF16.isSurrogate((char)i)) { 546 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 547 us = CharBuffer.wrap(chars, y, 1).slice(); 548 try { 549 smBufDecode(decoder, converter, bs, us, true, false, true); 550 smBufDecode(decoder, converter, bs, us, true, false, false); 551 smBufEncode(encoder, converter, us, bs, true, false, true); 552 smBufEncode(encoder, converter, us, bs, true, false, false); 553 } catch (Exception ex) { 554 errln("Incorrect result in " + converter + " for 0x" 555 + Integer.toHexString(i)); 556 break; 557 } 558 } else { 559 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 560 us = CharBuffer.wrap(chars, y, 1).slice(); 561 562 decoder.reset(); 563 cr = decoder.decode(bs, us, true); 564 bs.rewind(); 565 us.rewind(); 566 if (!cr.isMalformed() || cr.length() != 1) { 567 errln("Incorrect result in " + converter + " decoder for 0x" 568 + Integer.toHexString(i) + " received " + cr); 569 break; 570 } 571 encoder.reset(); 572 cr = encoder.encode(us, bs, true); 573 bs.rewind(); 574 us.rewind(); 575 if (!cr.isMalformed() || cr.length() != 1) { 576 errln("Incorrect result in " + converter + " encoder for 0x" 577 + Integer.toHexString(i) + " received " + cr); 578 break; 579 } 580 581 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 582 us = CharBuffer.wrap(new String(chars, y, 1)); 583 584 decoder.reset(); 585 cr = decoder.decode(bs, us, true); 586 bs.rewind(); 587 us.rewind(); 588 if (!cr.isMalformed() || cr.length() != 1) { 589 errln("Incorrect result in " + converter + " decoder for 0x" 590 + Integer.toHexString(i) + " received " + cr); 591 break; 592 } 593 encoder.reset(); 594 cr = encoder.encode(us, bs, true); 595 bs.rewind(); 596 us.rewind(); 597 if (!cr.isMalformed() || cr.length() != 1) { 598 errln("Incorrect result in " + converter + " encoder for 0x" 599 + Integer.toHexString(i) + " received " + cr); 600 break; 601 } 602 603 604 } 605 } 606 if (true) 607 return; 608 } 609 610 @Test TestHZ()611 public void TestHZ() { 612 /* test input */ 613 char[] in = new char[] { 614 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 615 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 616 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 617 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 618 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 619 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 620 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 621 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 622 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 623 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 624 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 625 0x005A, 0x005B, 0x005C, 0x000A 626 }; 627 628 String converter = "HZ"; 629 CharsetProvider icu = new CharsetProviderICU(); 630 Charset icuChar = icu.charsetForName(converter); 631 CharsetEncoder encoder = icuChar.newEncoder(); 632 CharsetDecoder decoder = icuChar.newDecoder(); 633 try { 634 CharBuffer start = CharBuffer.wrap(in); 635 ByteBuffer bytes = encoder.encode(start); 636 CharBuffer finish = decoder.decode(bytes); 637 638 if (!equals(start, finish)) { 639 errln(converter + " roundtrip test failed: start does not match finish"); 640 641 char[] finishArray = new char[finish.limit()]; 642 for (int i=0; i<finishArray.length; i++) 643 finishArray[i] = finish.get(i); 644 645 logln("start: " + hex(in)); 646 logln("finish: " + hex(finishArray)); 647 } 648 } catch (CharacterCodingException ex) { 649 errln(converter + " roundtrip test failed: " + ex.getMessage()); 650 ex.printStackTrace(System.err); 651 } 652 653 /* For better code coverage */ 654 CoderResult result = CoderResult.UNDERFLOW; 655 byte byteout[] = { 656 (byte)0x7e, (byte)0x7d, (byte)0x41, 657 (byte)0x7e, (byte)0x7b, (byte)0x21, 658 }; 659 char charin[] = { 660 (char)0x0041, (char)0x0042, (char)0x3000 661 }; 662 ByteBuffer bb = ByteBuffer.wrap(byteout); 663 CharBuffer cb = CharBuffer.wrap(charin); 664 int testLoopSize = 5; 665 int bbLimits[] = { 0, 1, 3, 4, 6}; 666 int bbPositions[] = { 0, 0, 0, 3, 3 }; 667 int ccPositions[] = { 0, 0, 0, 2, 2 }; 668 for (int i = 0; i < testLoopSize; i++) { 669 encoder.reset(); 670 bb.limit(bbLimits[i]); 671 bb.position(bbPositions[i]); 672 cb.position(ccPositions[i]); 673 result = encoder.encode(cb, bb, true); 674 675 if (i < 3) { 676 if (!result.isOverflow()) { 677 errln("Overflow buffer error should have occurred while encoding HZ (" + i + ")"); 678 } 679 } else { 680 if (result.isError()) { 681 errln("Error should not have occurred while encoding HZ.(" + i + ")"); 682 } 683 } 684 } 685 } 686 687 @Test TestUTF8Surrogates()688 public void TestUTF8Surrogates() { 689 byte[][] in = new byte[][] { 690 { (byte)0x61, }, 691 { (byte)0xc2, (byte)0x80, }, 692 { (byte)0xe0, (byte)0xa0, (byte)0x80, }, 693 { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)0x80, }, 694 { (byte)0xf4, (byte)0x84, (byte)0x8c, (byte)0xa1, }, 695 { (byte)0xf0, (byte)0x90, (byte)0x90, (byte)0x81, }, 696 }; 697 698 /* expected test results */ 699 char[][] results = new char[][] { 700 /* number of bytes read, code point */ 701 { '\u0061', }, 702 { '\u0080', }, 703 { '\u0800', }, 704 { '\ud800', '\udc00', }, // 10000 705 { '\udbd0', '\udf21', }, // 104321 706 { '\ud801', '\udc01', }, // 10401 707 }; 708 709 /* error test input */ 710 byte[][] in2 = new byte[][] { 711 { (byte)0x61, }, 712 { (byte)0xc0, (byte)0x80, /* illegal non-shortest form */ 713 (byte)0xe0, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 714 (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 715 (byte)0xc0, (byte)0xc0, /* illegal trail byte */ 716 (byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80, /* 0x110000 out of range */ 717 (byte)0xf8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, /* too long */ 718 (byte)0xfe, /* illegal byte altogether */ 719 (byte)0x62, }, 720 }; 721 722 /* expected error test results */ 723 char[][] results2 = new char[][] { 724 /* number of bytes read, code point */ 725 { '\u0062', }, 726 { '\u0062', }, 727 }; 728 729 String converter = "UTF-8"; 730 CharsetProvider icu = new CharsetProviderICU(); 731 Charset icuChar = icu.charsetForName(converter); 732 CharsetDecoder decoder = icuChar.newDecoder(); 733 734 int i; 735 try { 736 for (i = 0; i < in.length; i++) { 737 ByteBuffer source = ByteBuffer.wrap(in[i]); 738 CharBuffer expected = CharBuffer.wrap(results[i]); 739 smBufDecode(decoder, converter, source, expected, true, false, 740 true); 741 smBufDecode(decoder, converter, source, expected, true, false, 742 false); 743 } 744 } catch (Exception ex) { 745 errln("Incorrect result in " + converter); 746 } 747 try { 748 for (i = 0; i < in2.length; i++) { 749 ByteBuffer source = ByteBuffer.wrap(in2[i]); 750 CharBuffer expected = CharBuffer.wrap(results2[i]); 751 decoder.onMalformedInput(CodingErrorAction.IGNORE); 752 smBufDecode(decoder, converter, source, expected, true, false, 753 true); 754 smBufDecode(decoder, converter, source, expected, true, false, 755 false); 756 } 757 } catch (Exception ex) { 758 errln("Incorrect result in " + converter); 759 } 760 } 761 762 @Test TestSurrogateBehavior()763 public void TestSurrogateBehavior() { 764 CharsetProviderICU icu = new CharsetProviderICU(); 765 766 // get all the converters into an array 767 Object[] converters = CharsetProviderICU.getAvailableNames(); 768 769 String norm = "a"; 770 String ext = "\u0275"; // theta 771 String lead = "\ud835"; 772 String trail = "\udd04"; 773 // lead + trail = \U1d504 (fraktur capital A) 774 775 String input = 776 // error position 777 ext // unmap(1) 1 778 + lead // under 1 779 + lead // malf(1) 2 780 + trail // unmap(2) 4 781 + trail // malf(1) 5 782 + ext // unmap(1) 6 783 + norm // unmap(1) 7 784 ; 785 CoderResult[] results = new CoderResult[] { 786 CoderResult.unmappableForLength(1), // or underflow 787 CoderResult.UNDERFLOW, 788 CoderResult.malformedForLength(1), 789 CoderResult.unmappableForLength(2), // or underflow 790 CoderResult.malformedForLength(1), 791 CoderResult.unmappableForLength(1), // or underflow 792 CoderResult.unmappableForLength(1), // or underflow 793 }; 794 int[] positions = new int[] { 1,1,2,4,5,6,7 }; 795 int n = positions.length; 796 797 int badcount = 0; 798 int goodcount = 0; 799 int[] uhohindices = new int[n]; 800 int[] badposindices = new int[n]; 801 int[] malfindices = new int[n]; 802 int[] unmapindices = new int[n]; 803 ArrayList pass = new ArrayList(); 804 ArrayList exempt = new ArrayList(); 805 806 outer: for (int conv=0; conv<converters.length; conv++) { 807 String converter = (String)converters[conv]; 808 if (converter.equals("x-IMAP-mailbox-name") || converter.equals("UTF-7") || converter.equals("CESU-8") || converter.equals("BOCU-1") || 809 converter.equals("x-LMBCS-1")) { 810 exempt.add(converter); 811 continue; 812 } 813 814 boolean currentlybad = false; 815 Charset icuChar = icu.charsetForName(converter); 816 CharsetEncoder encoder = icuChar.newEncoder(); 817 CoderResult cr; 818 819 CharBuffer source = CharBuffer.wrap(input); 820 ByteBuffer target = ByteBuffer.allocate(30); 821 ByteBuffer expected = null; 822 try { 823 encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); 824 encoder.onMalformedInput(CodingErrorAction.IGNORE); 825 expected = encoder.encode(CharBuffer.wrap(ext + lead + trail + ext + norm)); 826 encoder.reset(); 827 } catch (CharacterCodingException ex) { 828 errln("Unexpected CharacterCodingException: " + ex.getMessage()); 829 return; 830 } catch (RuntimeException ex) { 831 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 832 errln(converter + " " + ex.getClass().getName() + ": " + ex.getMessage()); 833 continue outer; 834 } 835 836 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 837 encoder.onMalformedInput(CodingErrorAction.REPORT); 838 for (int i=0; i<n; i++) { 839 source.limit(i+1); 840 cr = encoder.encode(source, target, i == n - 1); 841 if (!(equals(cr, results[i]) 842 || (results[i].isUnmappable() && cr.isUnderflow()) // mappability depends on the converter 843 )) { 844 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 845 if (results[i].isMalformed() && cr.isMalformed()) { 846 malfindices[i]++; 847 } else if (results[i].isUnmappable() && cr.isUnmappable()) { 848 unmapindices[i]++; 849 } else { 850 uhohindices[i]++; 851 } 852 errln("(index=" + i + ") " + converter + " Received: " + cr + " Expected: " + results[i]); 853 } 854 if (source.position() != positions[i]) { 855 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 856 badposindices[i]++; 857 errln("(index=" + i + ") " + converter + " Received: " + source.position() + " Expected: " + positions[i]); 858 } 859 860 } 861 encoder.reset(); 862 863 //System.out.println("\n" + hex(target.array())); 864 //System.out.println(hex(expected.array()) + "\n" + expected.limit()); 865 if (!(equals(target, expected, expected.limit()) && target.position() == expected.limit())) { 866 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 867 errln(converter + " Received: \"" + hex(target.array()) + "\" Expected: \"" + hex(expected.array()) + "\""); 868 } 869 870 if (!currentlybad) { 871 goodcount++; 872 pass.add(converter); 873 } 874 } 875 876 logln("\n" + badcount + " / " + (converters.length - exempt.size()) + " (" + goodcount + " good, " + badcount + " bad)"); 877 log("index\t"); for (int i=0; i<n; i++) log(i + "\t"); logln(""); 878 log("unmap\t"); for (int i=0; i<n; i++) log(unmapindices[i] + "\t"); logln(""); 879 log("malf \t"); for (int i=0; i<n; i++) log(malfindices[i] + "\t"); logln(""); 880 log("pos \t"); for (int i=0; i<n; i++) log(badposindices[i] + "\t"); logln(""); 881 log("uhoh \t"); for (int i=0; i<n; i++) log(uhohindices[i] + "\t"); logln(""); 882 logln(""); 883 log("The few that passed: "); for (int i=0; i<pass.size(); i++) log(pass.get(i) + ", "); logln(""); 884 log("The few that are exempt: "); for (int i=0; i<exempt.size(); i++) log(exempt.get(i) + ", "); logln(""); 885 } 886 887 // public void TestCharsetCallback() { 888 // String currentTest = "initialization"; 889 // try { 890 // Class[] params; 891 // 892 // // get the classes 893 // Class CharsetCallback = Class.forName("com.ibm.icu.charset.CharsetCallback"); 894 // Class Decoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Decoder"); 895 // Class Encoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Encoder"); 896 // 897 // // set up encoderCall 898 // params = new Class[] {CharsetEncoderICU.class, Object.class, 899 // CharBuffer.class, ByteBuffer.class, IntBuffer.class, 900 // char[].class, int.class, int.class, CoderResult.class }; 901 // Method encoderCall = Encoder.getDeclaredMethod("call", params); 902 // 903 // // set up decoderCall 904 // params = new Class[] {CharsetDecoderICU.class, Object.class, 905 // ByteBuffer.class, CharBuffer.class, IntBuffer.class, 906 // char[].class, int.class, CoderResult.class}; 907 // Method decoderCall = Decoder.getDeclaredMethod("call", params); 908 // 909 // // get relevant fields 910 // Object SUB_STOP_ON_ILLEGAL = getFieldValue(CharsetCallback, "SUB_STOP_ON_ILLEGAL", null); 911 // 912 // // set up a few arguments 913 // CharsetProvider provider = new CharsetProviderICU(); 914 // Charset charset = provider.charsetForName("UTF-8"); 915 // CharsetEncoderICU encoder = (CharsetEncoderICU)charset.newEncoder(); 916 // CharsetDecoderICU decoder = (CharsetDecoderICU)charset.newDecoder(); 917 // CharBuffer chars = CharBuffer.allocate(10); 918 // chars.put('o'); 919 // chars.put('k'); 920 // ByteBuffer bytes = ByteBuffer.allocate(10); 921 // bytes.put((byte)'o'); 922 // bytes.put((byte)'k'); 923 // IntBuffer offsets = IntBuffer.allocate(10); 924 // offsets.put(0); 925 // offsets.put(1); 926 // char[] buffer = null; 927 // Integer length = new Integer(2); 928 // Integer cp = new Integer(0); 929 // CoderResult unmap = CoderResult.unmappableForLength(2); 930 // CoderResult malf = CoderResult.malformedForLength(2); 931 // CoderResult under = CoderResult.UNDERFLOW; 932 // 933 // // set up error arrays 934 // Integer invalidCharLength = new Integer(1); 935 // Byte subChar1 = new Byte((byte)0); 936 // Byte subChar1_alternate = new Byte((byte)1); // for TO_U_CALLBACK_SUBSTITUTE 937 // 938 // // set up chars and bytes backups and expected values for certain cases 939 // CharBuffer charsBackup = bufferCopy(chars); 940 // ByteBuffer bytesBackup = bufferCopy(bytes); 941 // IntBuffer offsetsBackup = bufferCopy(offsets); 942 // CharBuffer encoderCharsExpected = bufferCopy(chars); 943 // ByteBuffer encoderBytesExpected = bufferCopy(bytes); 944 // IntBuffer encoderOffsetsExpected = bufferCopy(offsets); 945 // CharBuffer decoderCharsExpected1 = bufferCopy(chars); 946 // CharBuffer decoderCharsExpected2 = bufferCopy(chars); 947 // IntBuffer decoderOffsetsExpected1 = bufferCopy(offsets); 948 // IntBuffer decoderOffsetsExpected2 = bufferCopy(offsets); 949 // 950 // // initialize fields to obtain expected data 951 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 952 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1); 953 // 954 // // run cbFromUWriteSub 955 // Method cbFromUWriteSub = CharsetEncoderICU.class.getDeclaredMethod("cbFromUWriteSub", new Class[] { CharsetEncoderICU.class, CharBuffer.class, ByteBuffer.class, IntBuffer.class}); 956 // cbFromUWriteSub.setAccessible(true); 957 // CoderResult encoderResultExpected = (CoderResult)cbFromUWriteSub.invoke(encoder, new Object[] {encoder, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected}); 958 // 959 // // run toUWriteUChars with normal data 960 // Method toUWriteUChars = CharsetDecoderICU.class.getDeclaredMethod("toUWriteUChars", new Class[] { CharsetDecoderICU.class, char[].class, int.class, int.class, CharBuffer.class, IntBuffer.class, int.class}); 961 // toUWriteUChars.setAccessible(true); 962 // CoderResult decoderResultExpected1 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0xFFFD}, new Integer(0), new Integer(1), decoderCharsExpected1, decoderOffsetsExpected1, new Integer(bytes.position())}); 963 // 964 // // reset certain fields 965 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 966 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1_alternate); 967 // 968 // // run toUWriteUChars again 969 // CoderResult decoderResultExpected2 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0x1A}, new Integer(0), new Integer(1), decoderCharsExpected2, decoderOffsetsExpected2, new Integer(bytes.position())}); 970 // 971 // // begin creating the tests array 972 // ArrayList tests = new ArrayList(); 973 // 974 // // create tests for FROM_U_CALLBACK_SKIP 0 975 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 976 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 977 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 978 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 979 // 980 // // create tests for TO_U_CALLBACK_SKIP 4 981 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 982 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 983 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 984 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 985 // 986 // // create tests for FROM_U_CALLBACK_STOP 8 987 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 988 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 989 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 990 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 991 // 992 // // create tests for TO_U_CALLBACK_STOP 12 993 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 994 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 995 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 996 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 997 // 998 // // create tests for FROM_U_CALLBACK_SUBSTITUTE 16 999 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1000 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1001 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1002 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1003 // 1004 // // create tests for TO_U_CALLBACK_SUBSTITUTE 20 1005 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected1, decoderCharsExpected1, bytesBackup, decoderOffsetsExpected1, new Object[] { invalidCharLength, subChar1 }}); 1006 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected2, decoderCharsExpected2, bytesBackup, decoderOffsetsExpected2, new Object[] { invalidCharLength, subChar1_alternate }}); 1007 // 1008 // Iterator iter = tests.iterator(); 1009 // for (int i=0; iter.hasNext(); i++) { 1010 // // get the data out of the map 1011 // Object[] next = (Object[])iter.next(); 1012 // 1013 // Method method = (Method)next[0]; 1014 // String fieldName = (String)next[1]; 1015 // Object field = getFieldValue(CharsetCallback, fieldName, null); 1016 // Object[] args = (Object[])next[2]; 1017 // CoderResult expected = (CoderResult)next[3]; 1018 // CharBuffer charsExpected = (CharBuffer)next[4]; 1019 // ByteBuffer bytesExpected = (ByteBuffer)next[5]; 1020 // IntBuffer offsetsExpected = (IntBuffer)next[6]; 1021 // 1022 // // set up error arrays and certain fields 1023 // Object[] values = (Object[])next[7]; 1024 // if (method == decoderCall) { 1025 // decoder.reset(); 1026 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, values[0]); 1027 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), values[1]); 1028 // } else if (method == encoderCall) { 1029 // encoder.reset(); 1030 // } 1031 // 1032 // try { 1033 // // invoke the method 1034 // CoderResult actual = (CoderResult)method.invoke(field, args); 1035 // 1036 // // if expected != actual 1037 // if (!coderResultsEqual(expected, actual)) { 1038 // // case #i refers to the index in the arraylist tests 1039 // errln(fieldName + " failed to return the correct result for case #" + i + "."); 1040 // } 1041 // // if the expected buffers != actual buffers 1042 // else if (!(buffersEqual(chars, charsExpected) && 1043 // buffersEqual(bytes, bytesExpected) && 1044 // buffersEqual(offsets, offsetsExpected))) { 1045 // // case #i refers to the index in the arraylist tests 1046 // errln(fieldName + " did not perform the correct operation on the buffers for case #" + i + "."); 1047 // } 1048 // } catch (InvocationTargetException ex) { 1049 // // case #i refers to the index in the arraylist tests 1050 // errln(fieldName + " threw an exception for case #" + i + ": " + ex.getCause()); 1051 // //ex.getCause().printStackTrace(); 1052 // } 1053 // 1054 // // reset the buffers 1055 // System.arraycopy(bytesBackup.array(), 0, bytes.array(), 0, 10); 1056 // System.arraycopy(charsBackup.array(), 0, chars.array(), 0, 10); 1057 // System.arraycopy(offsetsBackup.array(), 0, offsets.array(), 0, 10); 1058 // bytes.position(bytesBackup.position()); 1059 // chars.position(charsBackup.position()); 1060 // offsets.position(offsetsBackup.position()); 1061 // } 1062 // 1063 // } catch (Exception ex) { 1064 // errln("TestCharsetCallback skipped due to " + ex.toString()); 1065 // ex.printStackTrace(); 1066 // } 1067 // } 1068 // 1069 // private Object getFieldValue(Class c, String name, Object instance) throws Exception { 1070 // Field field = c.getDeclaredField(name); 1071 // field.setAccessible(true); 1072 // return field.get(instance); 1073 // } 1074 // private void setFieldValue(Class c, String name, Object instance, Object value) throws Exception { 1075 // Field field = c.getDeclaredField(name); 1076 // field.setAccessible(true); 1077 // if (value instanceof Boolean) 1078 // field.setBoolean(instance, ((Boolean)value).booleanValue()); 1079 // else if (value instanceof Byte) 1080 // field.setByte(instance, ((Byte)value).byteValue()); 1081 // else if (value instanceof Character) 1082 // field.setChar(instance, ((Character)value).charValue()); 1083 // else if (value instanceof Double) 1084 // field.setDouble(instance, ((Double)value).doubleValue()); 1085 // else if (value instanceof Float) 1086 // field.setFloat(instance, ((Float)value).floatValue()); 1087 // else if (value instanceof Integer) 1088 // field.setInt(instance, ((Integer)value).intValue()); 1089 // else if (value instanceof Long) 1090 // field.setLong(instance, ((Long)value).longValue()); 1091 // else if (value instanceof Short) 1092 // field.setShort(instance, ((Short)value).shortValue()); 1093 // else 1094 // field.set(instance, value); 1095 // } 1096 // private boolean coderResultsEqual(CoderResult a, CoderResult b) { 1097 // if (a == null && b == null) 1098 // return true; 1099 // if (a == null || b == null) 1100 // return false; 1101 // if ((a.isUnderflow() && b.isUnderflow()) || (a.isOverflow() && b.isOverflow())) 1102 // return true; 1103 // if (a.length() != b.length()) 1104 // return false; 1105 // if ((a.isMalformed() && b.isMalformed()) || (a.isUnmappable() && b.isUnmappable())) 1106 // return true; 1107 // return false; 1108 // } 1109 // private boolean buffersEqual(ByteBuffer a, ByteBuffer b) { 1110 // if (a.position() != b.position()) 1111 // return false; 1112 // int limit = a.position(); 1113 // for (int i=0; i<limit; i++) 1114 // if (a.get(i) != b.get(i)) 1115 // return false; 1116 // return true; 1117 // } 1118 // private boolean buffersEqual(CharBuffer a, CharBuffer b) { 1119 // if (a.position() != b.position()) 1120 // return false; 1121 // int limit = a.position(); 1122 // for (int i=0; i<limit; i++) 1123 // if (a.get(i) != b.get(i)) 1124 // return false; 1125 // return true; 1126 // } 1127 // private boolean buffersEqual(IntBuffer a, IntBuffer b) { 1128 // if (a.position() != b.position()) 1129 // return false; 1130 // int limit = a.position(); 1131 // for (int i=0; i<limit; i++) 1132 // if (a.get(i) != b.get(i)) 1133 // return false; 1134 // return true; 1135 // } 1136 // private ByteBuffer bufferCopy(ByteBuffer src) { 1137 // ByteBuffer dest = ByteBuffer.allocate(src.limit()); 1138 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1139 // dest.position(src.position()); 1140 // return dest; 1141 // } 1142 // private CharBuffer bufferCopy(CharBuffer src) { 1143 // CharBuffer dest = CharBuffer.allocate(src.limit()); 1144 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1145 // dest.position(src.position()); 1146 // return dest; 1147 // } 1148 // private IntBuffer bufferCopy(IntBuffer src) { 1149 // IntBuffer dest = IntBuffer.allocate(src.limit()); 1150 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1151 // dest.position(src.position()); 1152 // return dest; 1153 // } 1154 1155 1156 @Test TestAPISemantics( )1157 public void TestAPISemantics(/*String encoding*/) { 1158 String encoding = "UTF-16"; 1159 CharsetDecoder decoder = null; 1160 CharsetEncoder encoder = null; 1161 try { 1162 CharsetProviderICU provider = new CharsetProviderICU(); 1163 Charset charset = provider.charsetForName(encoding); 1164 decoder = charset.newDecoder(); 1165 encoder = charset.newEncoder(); 1166 } catch(MissingResourceException ex) { 1167 warnln("Could not load charset data: " + encoding); 1168 return; 1169 } 1170 1171 final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n"; 1172 final byte[] byteStr = { 1173 (byte) 0x00,(byte) 'a', 1174 (byte) 0x00,(byte) 'b', 1175 (byte) 0x00,(byte) 'c', 1176 (byte) 0x00,(byte) 'd', 1177 (byte) 0xd8,(byte) 0x00, 1178 (byte) 0xdc,(byte) 0x00, 1179 (byte) 0x12,(byte) 0x34, 1180 (byte) 0x00,(byte) 0xa5, 1181 (byte) 0x30,(byte) 0x00, 1182 (byte) 0x00,(byte) 0x0d, 1183 (byte) 0x00,(byte) 0x0a 1184 }; 1185 final byte[] expectedByteStr = { 1186 (byte) 0xfe,(byte) 0xff, 1187 (byte) 0x00,(byte) 'a', 1188 (byte) 0x00,(byte) 'b', 1189 (byte) 0x00,(byte) 'c', 1190 (byte) 0x00,(byte) 'd', 1191 (byte) 0xd8,(byte) 0x00, 1192 (byte) 0xdc,(byte) 0x00, 1193 (byte) 0x12,(byte) 0x34, 1194 (byte) 0x00,(byte) 0xa5, 1195 (byte) 0x30,(byte) 0x00, 1196 (byte) 0x00,(byte) 0x0d, 1197 (byte) 0x00,(byte) 0x0a 1198 }; 1199 1200 ByteBuffer byes = ByteBuffer.wrap(byteStr); 1201 CharBuffer uniVal = CharBuffer.wrap(unistr); 1202 ByteBuffer expected = ByteBuffer.wrap(expectedByteStr); 1203 1204 int rc = 0; 1205 if(decoder==null){ 1206 warnln("Could not load decoder."); 1207 return; 1208 } 1209 decoder.reset(); 1210 /* Convert the whole buffer to Unicode */ 1211 try { 1212 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1213 CoderResult result = decoder.decode(byes, chars, false); 1214 1215 if (result.isError()) { 1216 errln("ToChars encountered Error"); 1217 rc = 1; 1218 } 1219 if (result.isOverflow()) { 1220 errln("ToChars encountered overflow exception"); 1221 rc = 1; 1222 } 1223 if (!equals(chars, unistr)) { 1224 errln("ToChars does not match"); 1225 printchars(chars); 1226 errln("Expected : "); 1227 printchars(unistr); 1228 rc = 2; 1229 } 1230 1231 } catch (Exception e) { 1232 errln("ToChars - exception in buffer"); 1233 rc = 5; 1234 } 1235 1236 /* Convert single bytes to Unicode */ 1237 try { 1238 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1239 ByteBuffer b = ByteBuffer.wrap(byteStr); 1240 decoder.reset(); 1241 CoderResult result=null; 1242 for (int i = 1; i <= byteStr.length; i++) { 1243 b.limit(i); 1244 result = decoder.decode(b, chars, false); 1245 if(result.isOverflow()){ 1246 errln("ToChars single threw an overflow exception"); 1247 } 1248 if (result.isError()) { 1249 errln("ToChars single the result is an error "+result.toString()); 1250 } 1251 } 1252 if (unistr.length() != (chars.limit())) { 1253 errln("ToChars single len does not match"); 1254 rc = 3; 1255 } 1256 if (!equals(chars, unistr)) { 1257 errln("ToChars single does not match"); 1258 printchars(chars); 1259 rc = 4; 1260 } 1261 } catch (Exception e) { 1262 errln("ToChars - exception in single"); 1263 //e.printStackTrace(); 1264 rc = 6; 1265 } 1266 1267 /* Convert the buffer one at a time to Unicode */ 1268 try { 1269 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1270 decoder.reset(); 1271 byes.rewind(); 1272 for (int i = 1; i <= byteStr.length; i++) { 1273 byes.limit(i); 1274 CoderResult result = decoder.decode(byes, chars, false); 1275 if (result.isError()) { 1276 errln("Error while decoding: "+result.toString()); 1277 } 1278 if(result.isOverflow()){ 1279 errln("ToChars Simple threw an overflow exception"); 1280 } 1281 } 1282 if (chars.limit() != unistr.length()) { 1283 errln("ToChars Simple buffer len does not match"); 1284 rc = 7; 1285 } 1286 if (!equals(chars, unistr)) { 1287 errln("ToChars Simple buffer does not match"); 1288 printchars(chars); 1289 err(" Expected : "); 1290 printchars(unistr); 1291 rc = 8; 1292 } 1293 } catch (Exception e) { 1294 errln("ToChars - exception in single buffer"); 1295 //e.printStackTrace(System.err); 1296 rc = 9; 1297 } 1298 if (rc != 0) { 1299 errln("Test Simple ToChars for encoding : FAILED"); 1300 } 1301 1302 rc = 0; 1303 /* Convert the whole buffer from unicode */ 1304 try { 1305 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1306 encoder.reset(); 1307 CoderResult result = encoder.encode(uniVal, bytes, false); 1308 if (result.isError()) { 1309 errln("FromChars reported error: " + result.toString()); 1310 rc = 1; 1311 } 1312 if(result.isOverflow()){ 1313 errln("FromChars threw an overflow exception"); 1314 } 1315 bytes.position(0); 1316 if (!bytes.equals(expected)) { 1317 errln("FromChars does not match"); 1318 printbytes(bytes); 1319 printbytes(expected); 1320 rc = 2; 1321 } 1322 } catch (Exception e) { 1323 errln("FromChars - exception in buffer"); 1324 //e.printStackTrace(System.err); 1325 rc = 5; 1326 } 1327 1328 /* Convert the buffer one char at a time to unicode */ 1329 try { 1330 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1331 CharBuffer c = CharBuffer.wrap(unistr); 1332 encoder.reset(); 1333 CoderResult result= null; 1334 for (int i = 1; i <= unistr.length(); i++) { 1335 c.limit(i); 1336 result = encoder.encode(c, bytes, false); 1337 if(result.isOverflow()){ 1338 errln("FromChars single threw an overflow exception"); 1339 } 1340 if(result.isError()){ 1341 errln("FromChars single threw an error: "+ result.toString()); 1342 } 1343 } 1344 if (expectedByteStr.length != bytes.limit()) { 1345 errln("FromChars single len does not match"); 1346 rc = 3; 1347 } 1348 1349 bytes.position(0); 1350 if (!bytes.equals(expected)) { 1351 errln("FromChars single does not match"); 1352 printbytes(bytes); 1353 printbytes(expected); 1354 rc = 4; 1355 } 1356 1357 } catch (Exception e) { 1358 errln("FromChars - exception in single"); 1359 //e.printStackTrace(System.err); 1360 rc = 6; 1361 } 1362 1363 /* Convert one char at a time to unicode */ 1364 try { 1365 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1366 encoder.reset(); 1367 char[] temp = unistr.toCharArray(); 1368 CoderResult result=null; 1369 for (int i = 0; i <= temp.length; i++) { 1370 uniVal.limit(i); 1371 result = encoder.encode(uniVal, bytes, false); 1372 if(result.isOverflow()){ 1373 errln("FromChars simple threw an overflow exception"); 1374 } 1375 if(result.isError()){ 1376 errln("FromChars simple threw an error: "+ result.toString()); 1377 } 1378 } 1379 if (bytes.limit() != expectedByteStr.length) { 1380 errln("FromChars Simple len does not match"); 1381 rc = 7; 1382 } 1383 if (!bytes.equals(byes)) { 1384 errln("FromChars Simple does not match"); 1385 printbytes(bytes); 1386 printbytes(byes); 1387 rc = 8; 1388 } 1389 } catch (Exception e) { 1390 errln("FromChars - exception in single buffer"); 1391 //e.printStackTrace(System.err); 1392 rc = 9; 1393 } 1394 if (rc != 0) { 1395 errln("Test Simple FromChars " + encoding + " --FAILED"); 1396 } 1397 } 1398 printchars(CharBuffer buf)1399 void printchars(CharBuffer buf) { 1400 int i; 1401 char[] chars = new char[buf.limit()]; 1402 //save the current position 1403 int pos = buf.position(); 1404 buf.position(0); 1405 buf.get(chars); 1406 //reset to old position 1407 buf.position(pos); 1408 for (i = 0; i < chars.length; i++) { 1409 err(hex(chars[i]) + " "); 1410 } 1411 errln(""); 1412 } printchars(String str)1413 void printchars(String str) { 1414 char[] chars = str.toCharArray(); 1415 for (int i = 0; i < chars.length; i++) { 1416 err(hex(chars[i]) + " "); 1417 } 1418 errln(""); 1419 } printbytes(ByteBuffer buf)1420 void printbytes(ByteBuffer buf) { 1421 int i; 1422 byte[] bytes = new byte[buf.limit()]; 1423 //save the current position 1424 int pos = buf.position(); 1425 buf.position(0); 1426 buf.get(bytes); 1427 //reset to old position 1428 buf.position(pos); 1429 for (i = 0; i < bytes.length; i++) { 1430 System.out.print(hex(bytes[i]) + " "); 1431 } 1432 errln(""); 1433 } 1434 equals(CoderResult a, CoderResult b)1435 public boolean equals(CoderResult a, CoderResult b) { 1436 return (a.isUnderflow() && b.isUnderflow()) 1437 || (a.isOverflow() && b.isOverflow()) 1438 || (a.isMalformed() && b.isMalformed() && a.length() == b.length()) 1439 || (a.isUnmappable() && b.isUnmappable() && a.length() == b.length()); 1440 } equals(CharBuffer buf, String str)1441 public boolean equals(CharBuffer buf, String str) { 1442 return equals(buf, str.toCharArray()); 1443 } equals(CharBuffer buf, CharBuffer str)1444 public boolean equals(CharBuffer buf, CharBuffer str) { 1445 if (buf.limit() != str.limit()) 1446 return false; 1447 int limit = buf.limit(); 1448 for (int i = 0; i < limit; i++) 1449 if (buf.get(i) != str.get(i)) 1450 return false; 1451 return true; 1452 } equals(CharBuffer buf, CharBuffer str, int limit)1453 public boolean equals(CharBuffer buf, CharBuffer str, int limit) { 1454 if (limit > buf.limit() || limit > str.limit()) 1455 return false; 1456 for (int i = 0; i < limit; i++) 1457 if (buf.get(i) != str.get(i)) 1458 return false; 1459 return true; 1460 } equals(CharBuffer buf, char[] compareTo)1461 public boolean equals(CharBuffer buf, char[] compareTo) { 1462 char[] chars = new char[buf.limit()]; 1463 //save the current position 1464 int pos = buf.position(); 1465 buf.position(0); 1466 buf.get(chars); 1467 //reset to old position 1468 buf.position(pos); 1469 return equals(chars, compareTo); 1470 } 1471 equals(char[] chars, char[] compareTo)1472 public boolean equals(char[] chars, char[] compareTo) { 1473 if (chars.length != compareTo.length) { 1474 errln( 1475 "Length does not match chars: " 1476 + chars.length 1477 + " compareTo: " 1478 + compareTo.length); 1479 return false; 1480 } else { 1481 boolean result = true; 1482 for (int i = 0; i < chars.length; i++) { 1483 if (chars[i] != compareTo[i]) { 1484 logln( 1485 "Got: " 1486 + hex(chars[i]) 1487 + " Expected: " 1488 + hex(compareTo[i]) 1489 + " At: " 1490 + i); 1491 result = false; 1492 } 1493 } 1494 return result; 1495 } 1496 } 1497 equals(ByteBuffer buf, byte[] compareTo)1498 public boolean equals(ByteBuffer buf, byte[] compareTo) { 1499 byte[] chars = new byte[buf.limit()]; 1500 //save the current position 1501 int pos = buf.position(); 1502 buf.position(0); 1503 buf.get(chars); 1504 //reset to old position 1505 buf.position(pos); 1506 return equals(chars, compareTo); 1507 } equals(ByteBuffer buf, ByteBuffer compareTo)1508 public boolean equals(ByteBuffer buf, ByteBuffer compareTo) { 1509 if (buf.limit() != compareTo.limit()) 1510 return false; 1511 int limit = buf.limit(); 1512 for (int i = 0; i < limit; i++) 1513 if (buf.get(i) != compareTo.get(i)) 1514 return false; 1515 return true; 1516 } equals(ByteBuffer buf, ByteBuffer compareTo, int limit)1517 public boolean equals(ByteBuffer buf, ByteBuffer compareTo, int limit) { 1518 if (limit > buf.limit() || limit > compareTo.limit()) 1519 return false; 1520 for (int i = 0; i < limit; i++) 1521 if (buf.get(i) != compareTo.get(i)) 1522 return false; 1523 return true; 1524 } equals(byte[] chars, byte[] compareTo)1525 public boolean equals(byte[] chars, byte[] compareTo) { 1526 if (false/*chars.length != compareTo.length*/) { 1527 errln( 1528 "Length does not match chars: " 1529 + chars.length 1530 + " compareTo: " 1531 + compareTo.length); 1532 return false; 1533 } else { 1534 boolean result = true; 1535 for (int i = 0; i < chars.length; i++) { 1536 if (chars[i] != compareTo[i]) { 1537 logln( 1538 "Got: " 1539 + hex(chars[i]) 1540 + " Expected: " 1541 + hex(compareTo[i]) 1542 + " At: " 1543 + i); 1544 result = false; 1545 } 1546 } 1547 return result; 1548 } 1549 } 1550 1551 // TODO 1552 /* 1553 @Test 1554 public void TestCallback(String encoding) throws Exception { 1555 1556 byte[] gbSource = 1557 { 1558 (byte) 0x81, 1559 (byte) 0x36, 1560 (byte) 0xDE, 1561 (byte) 0x36, 1562 (byte) 0x81, 1563 (byte) 0x36, 1564 (byte) 0xDE, 1565 (byte) 0x37, 1566 (byte) 0x81, 1567 (byte) 0x36, 1568 (byte) 0xDE, 1569 (byte) 0x38, 1570 (byte) 0xe3, 1571 (byte) 0x32, 1572 (byte) 0x9a, 1573 (byte) 0x36 }; 1574 1575 char[] subChars = { 'P', 'I' }; 1576 1577 decoder.reset(); 1578 1579 decoder.replaceWith(new String(subChars)); 1580 ByteBuffer mySource = ByteBuffer.wrap(gbSource); 1581 CharBuffer myTarget = CharBuffer.allocate(5); 1582 1583 decoder.decode(mySource, myTarget, true); 1584 char[] expectedResult = 1585 { '\u22A6', '\u22A7', '\u22A8', '\u0050', '\u0049', }; 1586 1587 if (!equals(myTarget, new String(expectedResult))) { 1588 errln("Test callback GB18030 to Unicode : FAILED"); 1589 } 1590 1591 } 1592 */ 1593 1594 @Test TestCanConvert( )1595 public void TestCanConvert(/*String encoding*/)throws Exception { 1596 char[] mySource = { 1597 '\ud800', '\udc00',/*surrogate pair */ 1598 '\u22A6','\u22A7','\u22A8','\u22A9','\u22AA', 1599 '\u22AB','\u22AC','\u22AD','\u22AE','\u22AF', 1600 '\u22B0','\u22B1','\u22B2','\u22B3','\u22B4', 1601 '\ud800','\udc00',/*surrogate pair */ 1602 '\u22B5','\u22B6','\u22B7','\u22B8','\u22B9', 1603 '\u22BA','\u22BB','\u22BC','\u22BD','\u22BE' 1604 }; 1605 String encoding = "UTF-16"; 1606 CharsetEncoder encoder = null; 1607 try { 1608 CharsetProviderICU provider = new CharsetProviderICU(); 1609 Charset charset = provider.charsetForName(encoding); 1610 encoder = charset.newEncoder(); 1611 } catch(MissingResourceException ex) { 1612 warnln("Could not load charset data: " + encoding); 1613 return; 1614 } 1615 if (!encoder.canEncode(new String(mySource))) { 1616 errln("Test canConvert() " + encoding + " failed. "+encoder); 1617 } 1618 1619 } 1620 1621 @Test TestAvailableCharsets()1622 public void TestAvailableCharsets() { 1623 SortedMap map = Charset.availableCharsets(); 1624 Set keySet = map.keySet(); 1625 Iterator iter = keySet.iterator(); 1626 while(iter.hasNext()){ 1627 logln("Charset name: "+iter.next().toString()); 1628 } 1629 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1630 int mapSize = map.size(); 1631 if(mapSize < charsets.length){ 1632 errln("Charset.availableCharsets() returned a number less than the number returned by icu. ICU: " + charsets.length 1633 + " JDK: " + mapSize); 1634 } 1635 logln("Total Number of chasets = " + map.size()); 1636 } 1637 1638 @Test TestWindows936()1639 public void TestWindows936(){ 1640 CharsetProviderICU icu = new CharsetProviderICU(); 1641 Charset cs = icu.charsetForName("windows-936-2000"); 1642 String canonicalName = cs.name(); 1643 if(!canonicalName.equals("GBK")){ 1644 errln("Did not get the expected canonical name. Got: "+canonicalName); //get the canonical name 1645 } 1646 } 1647 1648 @Test TestICUAvailableCharsets()1649 public void TestICUAvailableCharsets() { 1650 CharsetProviderICU icu = new CharsetProviderICU(); 1651 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1652 for(int i=0;i<charsets.length;i++){ 1653 Charset cs = icu.charsetForName((String)charsets[i]); 1654 try{ 1655 CharsetEncoder encoder = cs.newEncoder(); 1656 if(encoder!=null){ 1657 logln("Creation of encoder succeeded. "+cs.toString()); 1658 } 1659 }catch(Exception ex){ 1660 errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString()); 1661 } 1662 try{ 1663 CharsetDecoder decoder = cs.newDecoder(); 1664 if(decoder!=null){ 1665 logln("Creation of decoder succeeded. "+cs.toString()); 1666 } 1667 }catch(Exception ex){ 1668 errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString()); 1669 } 1670 } 1671 } 1672 1673 /* jitterbug 4312 */ 1674 @Test TestUnsupportedCharset()1675 public void TestUnsupportedCharset(){ 1676 CharsetProvider icu = new CharsetProviderICU(); 1677 Charset icuChar = icu.charsetForName("impossible"); 1678 if(icuChar != null){ 1679 errln("ICU does not conform to the spec"); 1680 } 1681 } 1682 1683 @Test TestEncoderCreation()1684 public void TestEncoderCreation(){ 1685 // Use CharsetICU.forNameICU() so that we get the ICU version 1686 // even if the system or another provider also supports this charset. 1687 String encoding = "GB_2312-80"; 1688 try{ 1689 Charset cs = CharsetICU.forNameICU(encoding); 1690 CharsetEncoder enc = cs.newEncoder(); 1691 if(enc!=null){ 1692 logln("Successfully created an encoder for " + encoding + ": " + enc); 1693 if(!(enc instanceof CharsetEncoderICU)) { 1694 errln("Expected " + encoding + 1695 " to be implemented by ICU but got an instance of " + enc.getClass()); 1696 } 1697 }else{ 1698 errln("Error creating charset encoder for " + encoding); 1699 } 1700 }catch(Exception e){ 1701 warnln("Error creating charset encoder for " + encoding + ": " + e); 1702 } 1703 // Use Charset.forName() which may return an ICU Charset or some other implementation. 1704 encoding = "x-ibm-971_P100-1995"; 1705 try{ 1706 Charset cs = Charset.forName(encoding); 1707 CharsetEncoder enc = cs.newEncoder(); 1708 if(enc!=null){ 1709 logln("Successfully created an encoder for " + encoding + ": " + enc + 1710 " which is implemented by ICU? " + (enc instanceof CharsetEncoderICU)); 1711 }else{ 1712 errln("Error creating charset encoder for " + encoding); 1713 } 1714 }catch(Exception e){ 1715 warnln("Error creating charset encoder for " + encoding + ": " + e); 1716 } 1717 } 1718 1719 @Test TestSubBytes()1720 public void TestSubBytes(){ 1721 try{ 1722 //create utf-8 decoder 1723 CharsetDecoder decoder = new CharsetProviderICU().charsetForName("utf-8").newDecoder(); 1724 1725 //create a valid byte array, which can be decoded to " buffer" 1726 byte[] unibytes = new byte[] { 0x0020, 0x0062, 0x0075, 0x0066, 0x0066, 0x0065, 0x0072 }; 1727 1728 ByteBuffer buffer = ByteBuffer.allocate(20); 1729 1730 //add a evil byte to make the byte buffer be malformed input 1731 buffer.put((byte)0xd8); 1732 1733 //put the valid byte array 1734 buffer.put(unibytes); 1735 1736 //reset postion 1737 buffer.flip(); 1738 1739 decoder.onMalformedInput(CodingErrorAction.REPLACE); 1740 CharBuffer out = decoder.decode(buffer); 1741 String expected = "\ufffd buffer"; 1742 if(!expected.equals(new String(out.array()))){ 1743 errln("Did not get the expected result for substitution chars. Got: "+ 1744 new String(out.array()) + "("+ hex(out.array())+")"); 1745 } 1746 logln("Output: "+ new String(out.array()) + "("+ hex(out.array())+")"); 1747 }catch (CharacterCodingException ex){ 1748 errln("Unexpected exception: "+ex.toString()); 1749 } 1750 } 1751 /* 1752 1753 @Test 1754 public void TestImplFlushFailure(){ 1755 1756 try{ 1757 CharBuffer in = CharBuffer.wrap("\u3005\u3006\u3007\u30FC\u2015\u2010\uFF0F"); 1758 CharsetEncoder encoder = new CharsetProviderICU().charsetForName("iso-2022-jp").newEncoder(); 1759 ByteBuffer out = ByteBuffer.allocate(30); 1760 encoder.encode(in, out, true); 1761 encoder.flush(out); 1762 if(out.position()!= 20){ 1763 errln("Did not get the expected position from flush"); 1764 } 1765 1766 }catch (Exception ex){ 1767 errln("Could not create encoder for iso-2022-jp exception: "+ex.toString()); 1768 } 1769 } 1770 */ 1771 1772 @Test TestISO88591()1773 public void TestISO88591() { 1774 1775 Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1"); 1776 if(cs!=null){ 1777 CharsetEncoder encoder = cs.newEncoder(); 1778 if(encoder!=null){ 1779 encoder.canEncode("\uc2a3"); 1780 }else{ 1781 errln("Could not create encoder for iso-8859-1"); 1782 } 1783 }else{ 1784 errln("Could not create Charset for iso-8859-1"); 1785 } 1786 1787 } 1788 1789 @Test TestUTF8Encode()1790 public void TestUTF8Encode() { 1791 // Test with a lead surrogate in the middle of the input text. 1792 // Java API behavior is unclear for surrogates at the end, see ticket #11546. 1793 CharBuffer in = CharBuffer.wrap("\ud800a"); 1794 ByteBuffer out = ByteBuffer.allocate(30); 1795 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1796 CoderResult result = encoderICU.encode(in, out, true); 1797 1798 if (result.isMalformed()) { 1799 logln("\\ud800 is malformed for ICU4JNI utf-8 encoder"); 1800 } else if (result.isUnderflow()) { 1801 errln("FAIL: \\ud800 is OK for ICU4JNI utf-8 encoder"); 1802 } 1803 1804 in.position(0); 1805 out.clear(); 1806 1807 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1808 result = encoderJDK.encode(in, out, true); 1809 if (result.isMalformed()) { 1810 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1811 } else if (result.isUnderflow()) { 1812 errln("BAD: \\ud800 is OK for JDK utf-8 encoder"); 1813 } 1814 } 1815 1816 /* private void printCB(CharBuffer buf){ 1817 buf.rewind(); 1818 while(buf.hasRemaining()){ 1819 System.out.println(hex(buf.get())); 1820 } 1821 buf.rewind(); 1822 } 1823 */ 1824 1825 @Test TestUTF8()1826 public void TestUTF8() throws CharacterCodingException{ 1827 try{ 1828 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1829 encoderICU.encode(CharBuffer.wrap("\ud800")); 1830 errln("\\ud800 is OK for ICU4JNI utf-8 encoder"); 1831 }catch (Exception e) { 1832 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1833 //e.printStackTrace(); 1834 } 1835 1836 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1837 try { 1838 encoderJDK.encode(CharBuffer.wrap("\ud800")); 1839 errln("\\ud800 is OK for JDK utf-8 encoder"); 1840 } catch (Exception e) { 1841 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1842 //e.printStackTrace(); 1843 } 1844 } 1845 1846 @Test TestUTF16Bom()1847 public void TestUTF16Bom(){ 1848 1849 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-16"); 1850 char[] in = new char[] { 0x1122, 0x2211, 0x3344, 0x4433, 1851 0x5566, 0x6655, 0x7788, 0x8877, 0x9900 }; 1852 CharBuffer inBuf = CharBuffer.allocate(in.length); 1853 inBuf.put(in); 1854 CharsetEncoder encoder = cs.newEncoder(); 1855 ByteBuffer outBuf = ByteBuffer.allocate(in.length*2+2); 1856 inBuf.rewind(); 1857 encoder.encode(inBuf, outBuf, true); 1858 outBuf.rewind(); 1859 if(outBuf.get(0)!= (byte)0xFE && outBuf.get(1)!= (byte)0xFF){ 1860 errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining()); 1861 } 1862 while(outBuf.hasRemaining()){ 1863 logln("0x"+hex(outBuf.get())); 1864 } 1865 CharsetDecoder decoder = cs.newDecoder(); 1866 outBuf.rewind(); 1867 CharBuffer rt = CharBuffer.allocate(in.length); 1868 CoderResult cr = decoder.decode(outBuf, rt, true); 1869 if(cr.isError()){ 1870 errln("Decoding with BOM failed. Error: "+ cr.toString()); 1871 } 1872 equals(rt, in); 1873 { 1874 rt.clear(); 1875 outBuf.rewind(); 1876 Charset utf16 = Charset.forName("UTF-16"); 1877 CharsetDecoder dc = utf16.newDecoder(); 1878 cr = dc.decode(outBuf, rt, true); 1879 equals(rt, in); 1880 } 1881 } 1882 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush)1883 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1884 boolean throwException, boolean flush) throws BufferOverflowException, Exception { 1885 smBufDecode(decoder, encoding, source, target, throwException, flush, true); 1886 } 1887 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray)1888 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1889 boolean throwException, boolean flush, boolean backedByArray) throws BufferOverflowException, Exception { 1890 smBufDecode(decoder, encoding, source, target, throwException, flush, backedByArray, -1); 1891 } 1892 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)1893 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1894 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) 1895 throws BufferOverflowException, Exception { 1896 ByteBuffer mySource; 1897 CharBuffer myTarget; 1898 if (backedByArray) { 1899 mySource = ByteBuffer.allocate(source.capacity()); 1900 myTarget = CharBuffer.allocate(target.capacity()); 1901 } else { 1902 // this does not guarantee by any means that mySource and myTarget 1903 // are not backed by arrays 1904 mySource = ByteBuffer.allocateDirect(source.capacity()); 1905 myTarget = ByteBuffer.allocateDirect(target.capacity() * 2).asCharBuffer(); 1906 } 1907 mySource.position(source.position()); 1908 for (int i = source.position(); i < source.limit(); i++) 1909 mySource.put(i, source.get(i)); 1910 1911 { 1912 decoder.reset(); 1913 myTarget.limit(target.limit()); 1914 mySource.limit(source.limit()); 1915 mySource.position(source.position()); 1916 CoderResult result = CoderResult.UNDERFLOW; 1917 result = decoder.decode(mySource, myTarget, true); 1918 if (flush) { 1919 result = decoder.flush(myTarget); 1920 } 1921 if (result.isError()) { 1922 if (throwException) { 1923 throw new Exception(); 1924 } 1925 errln("Test complete buffers while decoding failed. " + result.toString()); 1926 return; 1927 } 1928 if (result.isOverflow()) { 1929 if (throwException) { 1930 throw new BufferOverflowException(); 1931 } 1932 errln("Test complete buffers while decoding threw overflow exception"); 1933 return; 1934 } 1935 myTarget.limit(myTarget.position()); 1936 myTarget.position(0); 1937 target.position(0); 1938 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1939 errln(" Test complete buffers while decoding " + encoding + " TO Unicode--failed"); 1940 } 1941 } 1942 if (isQuick()) { 1943 return; 1944 } 1945 { 1946 decoder.reset(); 1947 myTarget.limit(target.position()); 1948 mySource.limit(source.position()); 1949 mySource.position(source.position()); 1950 myTarget.clear(); 1951 myTarget.position(0); 1952 1953 int inputLen = mySource.remaining(); 1954 1955 CoderResult result = CoderResult.UNDERFLOW; 1956 for (int i = 1; i <= inputLen; i++) { 1957 mySource.limit(i); 1958 if (i == inputLen) { 1959 result = decoder.decode(mySource, myTarget, true); 1960 } else { 1961 result = decoder.decode(mySource, myTarget, false); 1962 } 1963 if (result.isError()) { 1964 errln("Test small input buffers while decoding failed. " + result.toString()); 1965 break; 1966 } 1967 if (result.isOverflow()) { 1968 if (throwException) { 1969 throw new BufferOverflowException(); 1970 } 1971 errln("Test small input buffers while decoding threw overflow exception"); 1972 break; 1973 } 1974 1975 } 1976 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1977 errln("Test small input buffers while decoding " + encoding + " TO Unicode--failed"); 1978 } 1979 } 1980 { 1981 decoder.reset(); 1982 myTarget.limit(0); 1983 mySource.limit(0); 1984 mySource.position(source.position()); 1985 myTarget.clear(); 1986 while (true) { 1987 CoderResult result = decoder.decode(mySource, myTarget, false); 1988 if (result.isUnderflow()) { 1989 if (mySource.limit() < source.limit()) 1990 mySource.limit(mySource.limit() + 1); 1991 } else if (result.isOverflow()) { 1992 if (myTarget.limit() < target.limit()) 1993 myTarget.limit(myTarget.limit() + 1); 1994 else 1995 break; 1996 } else /*if (result.isError())*/ { 1997 errln("Test small output buffers while decoding " + result.toString()); 1998 } 1999 if (mySource.position() == mySource.limit()) { 2000 result = decoder.decode(mySource, myTarget, true); 2001 if (result.isError()) { 2002 errln("Test small output buffers while decoding " + result.toString()); 2003 } 2004 result = decoder.flush(myTarget); 2005 if (result.isError()) { 2006 errln("Test small output buffers while decoding " + result.toString()); 2007 } 2008 break; 2009 } 2010 } 2011 2012 if (!equals(myTarget, target, targetLimit)) { 2013 errln("Test small output buffers " + encoding + " TO Unicode failed"); 2014 } 2015 } 2016 } 2017 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush)2018 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2019 boolean throwException, boolean flush) throws Exception, BufferOverflowException { 2020 smBufEncode(encoder, encoding, source, target, throwException, flush, true); 2021 } 2022 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray)2023 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2024 boolean throwException, boolean flush, boolean backedByArray) throws Exception, BufferOverflowException { 2025 smBufEncode(encoder, encoding, source, target, throwException, flush, true, -1); 2026 } 2027 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)2028 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2029 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) throws Exception, 2030 BufferOverflowException { 2031 logln("Running smBufEncode for " + encoding + " with class " + encoder); 2032 2033 CharBuffer mySource; 2034 ByteBuffer myTarget; 2035 if (backedByArray) { 2036 mySource = CharBuffer.allocate(source.capacity()); 2037 myTarget = ByteBuffer.allocate(target.capacity()); 2038 } else { 2039 mySource = ByteBuffer.allocateDirect(source.capacity() * 2).asCharBuffer(); 2040 myTarget = ByteBuffer.allocateDirect(target.capacity()); 2041 } 2042 mySource.position(source.position()); 2043 for (int i = source.position(); i < source.limit(); i++) 2044 mySource.put(i, source.get(i)); 2045 2046 myTarget.clear(); 2047 { 2048 logln("Running tests on small input buffers for " + encoding); 2049 encoder.reset(); 2050 myTarget.limit(target.limit()); 2051 mySource.limit(source.limit()); 2052 mySource.position(source.position()); 2053 CoderResult result = null; 2054 2055 result = encoder.encode(mySource, myTarget, true); 2056 if (flush) { 2057 result = encoder.flush(myTarget); 2058 } 2059 2060 if (result.isError()) { 2061 if (throwException) { 2062 throw new Exception(); 2063 } 2064 errln("Test complete while encoding failed. " + result.toString()); 2065 } 2066 if (result.isOverflow()) { 2067 if (throwException) { 2068 throw new BufferOverflowException(); 2069 } 2070 errln("Test complete while encoding threw overflow exception"); 2071 } 2072 if (!equals(myTarget, target, targetLimit)) { 2073 errln("Test complete buffers while encoding for " + encoding + " failed"); 2074 2075 } else { 2076 logln("Tests complete buffers for " + encoding + " passed"); 2077 } 2078 } 2079 if (isQuick()) { 2080 return; 2081 } 2082 { 2083 logln("Running tests on small input buffers for " + encoding); 2084 encoder.reset(); 2085 myTarget.clear(); 2086 myTarget.limit(target.limit()); 2087 mySource.limit(source.limit()); 2088 mySource.position(source.position()); 2089 int inputLen = mySource.limit(); 2090 CoderResult result = null; 2091 for (int i = 1; i <= inputLen; i++) { 2092 mySource.limit(i); 2093 result = encoder.encode(mySource, myTarget, false); 2094 if (result.isError()) { 2095 errln("Test small input buffers while encoding failed. " + result.toString()); 2096 } 2097 if (result.isOverflow()) { 2098 if (throwException) { 2099 throw new BufferOverflowException(); 2100 } 2101 errln("Test small input buffers while encoding threw overflow exception"); 2102 } 2103 } 2104 if (!equals(myTarget, target, targetLimit)) { 2105 errln("Test small input buffers " + encoding + " From Unicode failed"); 2106 } else { 2107 logln("Tests on small input buffers for " + encoding + " passed"); 2108 } 2109 } 2110 { 2111 logln("Running tests on small output buffers for " + encoding); 2112 encoder.reset(); 2113 myTarget.clear(); 2114 myTarget.limit(target.limit()); 2115 mySource.limit(source.limit()); 2116 mySource.position(source.position()); 2117 mySource.position(0); 2118 myTarget.position(0); 2119 2120 logln("myTarget.limit: " + myTarget.limit() + " myTarget.capcity: " + myTarget.capacity()); 2121 2122 while (true) { 2123 int pos = myTarget.position(); 2124 2125 CoderResult result = encoder.encode(mySource, myTarget, false); 2126 logln("myTarget.Position: " + pos + " myTarget.limit: " + myTarget.limit()); 2127 logln("mySource.position: " + mySource.position() + " mySource.limit: " + mySource.limit()); 2128 2129 if (result.isError()) { 2130 errln("Test small output buffers while encoding " + result.toString()); 2131 } 2132 if (mySource.position() == mySource.limit()) { 2133 result = encoder.encode(mySource, myTarget, true); 2134 if (result.isError()) { 2135 errln("Test small output buffers while encoding " + result.toString()); 2136 } 2137 2138 myTarget.limit(myTarget.capacity()); 2139 result = encoder.flush(myTarget); 2140 if (result.isError()) { 2141 errln("Test small output buffers while encoding " + result.toString()); 2142 } 2143 break; 2144 } 2145 } 2146 if (!equals(myTarget, target, targetLimit)) { 2147 errln("Test small output buffers " + encoding + " From Unicode failed."); 2148 } 2149 logln("Tests on small output buffers for " + encoding + " passed"); 2150 } 2151 } 2152 2153 2154 //TODO 2155 /* 2156 @Test 2157 public void TestString(ByteBuffer bSource, CharBuffer uSource) throws Exception { 2158 try { 2159 { 2160 String source = uSource.toString(); 2161 byte[] target = source.getBytes(m_encoding); 2162 if (!equals(target, bSource.array())) { 2163 errln("encode using string API failed"); 2164 } 2165 } 2166 { 2167 2168 String target = new String(bSource.array(), m_encoding); 2169 if (!equals(uSource, target.toCharArray())) { 2170 errln("decode using string API failed"); 2171 } 2172 } 2173 } catch (Exception e) { 2174 //e.printStackTrace(); 2175 errln(e.getMessage()); 2176 } 2177 } 2178 2179 /*private void fromUnicodeTest() throws Exception { 2180 2181 logln("Loaded Charset: " + charset.getClass().toString()); 2182 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2183 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2184 2185 ByteBuffer myTarget = ByteBuffer.allocate(gbSource.length); 2186 logln("Created ByteBuffer of length: " + uSource.length); 2187 CharBuffer mySource = CharBuffer.wrap(uSource); 2188 logln("Wrapped ByteBuffer with CharBuffer "); 2189 encoder.reset(); 2190 logln("Test Unicode to " + encoding ); 2191 encoder.encode(mySource, myTarget, true); 2192 if (!equals(myTarget, gbSource)) { 2193 errln("--Test Unicode to " + encoding + ": FAILED"); 2194 } 2195 logln("Test Unicode to " + encoding +" passed"); 2196 } 2197 2198 @Test 2199 public void TestToUnicode( ) throws Exception { 2200 2201 logln("Loaded Charset: " + charset.getClass().toString()); 2202 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2203 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2204 2205 CharBuffer myTarget = CharBuffer.allocate(uSource.length); 2206 ByteBuffer mySource = ByteBuffer.wrap(getByteArray(gbSource)); 2207 decoder.reset(); 2208 CoderResult result = decoder.decode(mySource, myTarget, true); 2209 if (result.isError()) { 2210 errln("Test ToUnicode -- FAILED"); 2211 } 2212 if (!equals(myTarget, uSource)) { 2213 errln("--Test " + encoding + " to Unicode :FAILED"); 2214 } 2215 } 2216 2217 public static byte[] getByteArray(char[] source) { 2218 byte[] target = new byte[source.length]; 2219 int i = source.length; 2220 for (; --i >= 0;) { 2221 target[i] = (byte) source[i]; 2222 } 2223 return target; 2224 } 2225 /* 2226 private void smBufCharset(Charset charset) { 2227 try { 2228 ByteBuffer bTarget = charset.encode(CharBuffer.wrap(uSource)); 2229 CharBuffer uTarget = 2230 charset.decode(ByteBuffer.wrap(getByteArray(gbSource))); 2231 2232 if (!equals(uTarget, uSource)) { 2233 errln("Test " + charset.toString() + " to Unicode :FAILED"); 2234 } 2235 if (!equals(bTarget, gbSource)) { 2236 errln("Test " + charset.toString() + " from Unicode :FAILED"); 2237 } 2238 } catch (Exception ex) { 2239 errln("Encountered exception in smBufCharset"); 2240 } 2241 } 2242 2243 @Test 2244 public void TestMultithreaded() throws Exception { 2245 final Charset cs = Charset.forName(encoding); 2246 if (cs == charset) { 2247 errln("The objects are equal"); 2248 } 2249 smBufCharset(cs); 2250 try { 2251 final Thread t1 = new Thread() { 2252 public void run() { 2253 // commented out since the mehtods on 2254 // Charset API are supposed to be thread 2255 // safe ... to test it we dont sync 2256 2257 // synchronized(charset){ 2258 while (!interrupted()) { 2259 try { 2260 smBufCharset(cs); 2261 } catch (UnsupportedCharsetException ueEx) { 2262 errln(ueEx.toString()); 2263 } 2264 } 2265 2266 // } 2267 } 2268 }; 2269 final Thread t2 = new Thread() { 2270 public void run() { 2271 // synchronized(charset){ 2272 while (!interrupted()) { 2273 try { 2274 smBufCharset(cs); 2275 } catch (UnsupportedCharsetException ueEx) { 2276 errln(ueEx.toString()); 2277 } 2278 } 2279 2280 //} 2281 } 2282 }; 2283 t1.start(); 2284 t2.start(); 2285 int i = 0; 2286 for (;;) { 2287 if (i > 1000000000) { 2288 try { 2289 t1.interrupt(); 2290 } catch (Exception e) { 2291 } 2292 try { 2293 t2.interrupt(); 2294 } catch (Exception e) { 2295 } 2296 break; 2297 } 2298 i++; 2299 } 2300 } catch (Exception e) { 2301 throw e; 2302 } 2303 } 2304 2305 @Test 2306 public void TestSynchronizedMultithreaded() throws Exception { 2307 // Methods on CharsetDecoder and CharsetEncoder classes 2308 // are inherently unsafe if accessed by multiple concurrent 2309 // thread so we synchronize them 2310 final Charset charset = Charset.forName(encoding); 2311 final CharsetDecoder decoder = charset.newDecoder(); 2312 final CharsetEncoder encoder = charset.newEncoder(); 2313 try { 2314 final Thread t1 = new Thread() { 2315 public void run() { 2316 while (!interrupted()) { 2317 try { 2318 synchronized (encoder) { 2319 smBufEncode(encoder, encoding); 2320 } 2321 synchronized (decoder) { 2322 smBufDecode(decoder, encoding); 2323 } 2324 } catch (UnsupportedCharsetException ueEx) { 2325 errln(ueEx.toString()); 2326 } 2327 } 2328 2329 } 2330 }; 2331 final Thread t2 = new Thread() { 2332 public void run() { 2333 while (!interrupted()) { 2334 try { 2335 synchronized (encoder) { 2336 smBufEncode(encoder, encoding); 2337 } 2338 synchronized (decoder) { 2339 smBufDecode(decoder, encoding); 2340 } 2341 } catch (UnsupportedCharsetException ueEx) { 2342 errln(ueEx.toString()); 2343 } 2344 } 2345 } 2346 }; 2347 t1.start(); 2348 t2.start(); 2349 int i = 0; 2350 for (;;) { 2351 if (i > 1000000000) { 2352 try { 2353 t1.interrupt(); 2354 } catch (Exception e) { 2355 } 2356 try { 2357 t2.interrupt(); 2358 } catch (Exception e) { 2359 } 2360 break; 2361 } 2362 i++; 2363 } 2364 } catch (Exception e) { 2365 throw e; 2366 } 2367 } 2368 */ 2369 2370 @Test TestMBCS()2371 public void TestMBCS(){ 2372 { 2373 // Encoder: from Unicode conversion 2374 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("ibm-971").newEncoder(); 2375 ByteBuffer out = ByteBuffer.allocate(6); 2376 encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE); 2377 CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true); 2378 if(!result.isError()){ 2379 byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; 2380 if(!equals(expected, out.array())){ 2381 errln("Did not get the expected result for substitution bytes. Got: "+ 2382 hex(out.array())); 2383 } 2384 logln("Output: "+ hex(out.array())); 2385 }else{ 2386 errln("Encode operation failed for encoder: "+encoderICU.toString()); 2387 } 2388 } 2389 { 2390 // Decoder: to Unicode conversion 2391 CharsetDecoder decoderICU = new CharsetProviderICU().charsetForName("ibm-971").newDecoder(); 2392 CharBuffer out = CharBuffer.allocate(3); 2393 decoderICU.onMalformedInput(CodingErrorAction.REPLACE); 2394 CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true); 2395 if(!result.isError()){ 2396 char[] expected = {'\u00a1', '\ufffd', '\u6676'}; 2397 if(!equals(expected, out.array())){ 2398 errln("Did not get the expected result for substitution chars. Got: "+ 2399 hex(out.array())); 2400 } 2401 logln("Output: "+ hex(out.array())); 2402 }else{ 2403 errln("Decode operation failed for encoder: "+decoderICU.toString()); 2404 } 2405 } 2406 } 2407 2408 @Test TestJB4897()2409 public void TestJB4897(){ 2410 CharsetProviderICU provider = new CharsetProviderICU(); 2411 Charset charset = provider.charsetForName("x-abracadabra"); 2412 if(charset!=null && charset.canEncode()== true){ 2413 errln("provider.charsetForName() does not validate the charset names" ); 2414 } 2415 } 2416 2417 @Test TestJB5027()2418 public void TestJB5027() { 2419 CharsetProviderICU provider= new CharsetProviderICU(); 2420 2421 Charset fake = provider.charsetForName("doesNotExist"); 2422 if(fake != null){ 2423 errln("\"doesNotExist\" returned " + fake); 2424 } 2425 Charset xfake = provider.charsetForName("x-doesNotExist"); 2426 if(xfake!=null){ 2427 errln("\"x-doesNotExist\" returned " + xfake); 2428 } 2429 } 2430 2431 //test to make sure that number of aliases and canonical names are in the charsets that are in 2432 @Test TestAllNames()2433 public void TestAllNames() { 2434 2435 CharsetProviderICU provider= new CharsetProviderICU(); 2436 Object[] available = CharsetProviderICU.getAvailableNames(); 2437 for(int i=0; i<available.length;i++){ 2438 try{ 2439 String canon = CharsetProviderICU.getICUCanonicalName((String)available[i]); 2440 2441 // ',' is not allowed by Java's charset name checker 2442 if(canon.indexOf(',')>=0){ 2443 continue; 2444 } 2445 Charset cs = provider.charsetForName((String)available[i]); 2446 2447 Object[] javaAliases = cs.aliases().toArray(); 2448 //seach for ICU canonical name in javaAliases 2449 boolean inAliasList = false; 2450 for(int j=0; j<javaAliases.length; j++){ 2451 String java = (String) javaAliases[j]; 2452 if(java.equals(canon)){ 2453 logln("javaAlias: " + java + " canon: " + canon); 2454 inAliasList = true; 2455 } 2456 } 2457 if(inAliasList == false){ 2458 errln("Could not find ICU canonical name: "+canon+ " for java canonical name: "+ available[i]+ " "+ i); 2459 } 2460 }catch(UnsupportedCharsetException ex){ 2461 errln("could no load charset "+ available[i]+" "+ex.getMessage()); 2462 continue; 2463 } 2464 } 2465 } 2466 2467 @Test TestDecoderImplFlush()2468 public void TestDecoderImplFlush() { 2469 CharsetProviderICU provider = new CharsetProviderICU(); 2470 Charset ics = provider.charsetForName("UTF-16"); 2471 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2472 execDecoder(jcs); 2473 execDecoder(ics); 2474 } 2475 2476 @Test TestEncoderImplFlush()2477 public void TestEncoderImplFlush() { 2478 CharsetProviderICU provider = new CharsetProviderICU(); 2479 Charset ics = provider.charsetForName("UTF-16"); 2480 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2481 execEncoder(jcs); 2482 execEncoder(ics); 2483 } execDecoder(Charset cs)2484 private void execDecoder(Charset cs){ 2485 CharsetDecoder decoder = cs.newDecoder(); 2486 decoder.onMalformedInput(CodingErrorAction.REPORT); 2487 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2488 CharBuffer out = CharBuffer.allocate(10); 2489 CoderResult result = decoder.decode(ByteBuffer.wrap(new byte[] { -1, 2490 -2, 32, 0, 98 }), out, false); 2491 result = decoder.decode(ByteBuffer.wrap(new byte[] { 98 }), out, true); 2492 2493 logln(cs.getClass().toString()+ ":" +result.toString()); 2494 try { 2495 result = decoder.flush(out); 2496 logln(cs.getClass().toString()+ ":" +result.toString()); 2497 } catch (Exception e) { 2498 errln(e.getMessage()+" "+cs.getClass().toString()); 2499 } 2500 } execEncoder(Charset cs)2501 private void execEncoder(Charset cs){ 2502 CharsetEncoder encoder = cs.newEncoder(); 2503 encoder.onMalformedInput(CodingErrorAction.REPORT); 2504 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2505 ByteBuffer out = ByteBuffer.allocate(10); 2506 CoderResult result = encoder.encode(CharBuffer.wrap(new char[] { '\uFFFF', 2507 '\u2345', 32, 98 }), out, false); 2508 logln(cs.getClass().toString()+ ":" +result.toString()); 2509 result = encoder.encode(CharBuffer.wrap(new char[] { 98 }), out, true); 2510 2511 logln(cs.getClass().toString()+ ":" +result.toString()); 2512 try { 2513 result = encoder.flush(out); 2514 logln(cs.getClass().toString()+ ":" +result.toString()); 2515 } catch (Exception e) { 2516 errln(e.getMessage()+" "+cs.getClass().toString()); 2517 } 2518 } 2519 2520 @Test TestDecodeMalformed()2521 public void TestDecodeMalformed() { 2522 CharsetProviderICU provider = new CharsetProviderICU(); 2523 Charset ics = provider.charsetForName("UTF-16BE"); 2524 //Use SUN's charset 2525 Charset jcs = Charset.forName("UTF-16"); 2526 CoderResult ir = execMalformed(ics); 2527 CoderResult jr = execMalformed(jcs); 2528 if(ir!=jr){ 2529 errln("ICU's decoder did not return the same result as Sun. ICU: "+ir.toString()+" Sun: "+jr.toString()); 2530 } 2531 } 2532 execMalformed(Charset cs)2533 private CoderResult execMalformed(Charset cs){ 2534 CharsetDecoder decoder = cs.newDecoder(); 2535 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2536 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2537 ByteBuffer in = ByteBuffer.wrap(new byte[] { 0x00, 0x41, 0x00, 0x42, 0x01 }); 2538 CharBuffer out = CharBuffer.allocate(3); 2539 return decoder.decode(in, out, true); 2540 } 2541 2542 @Test TestJavaUTF16Decoder()2543 public void TestJavaUTF16Decoder(){ 2544 CharsetProviderICU provider = new CharsetProviderICU(); 2545 Charset ics = provider.charsetForName("UTF-16BE"); 2546 //Use SUN's charset 2547 Charset jcs = Charset.forName("UTF-16"); 2548 Exception ie = execConvertAll(ics); 2549 Exception je = execConvertAll(jcs); 2550 if(ie!=je){ 2551 errln("ICU's decoder did not return the same result as Sun. ICU: "+ie.toString()+" Sun: "+je.toString()); 2552 } 2553 } execConvertAll(Charset cs)2554 private Exception execConvertAll(Charset cs){ 2555 ByteBuffer in = ByteBuffer.allocate(400); 2556 int i=0; 2557 while(in.position()!=in.capacity()){ 2558 in.put((byte)0xD8); 2559 in.put((byte)i); 2560 in.put((byte)0xDC); 2561 in.put((byte)i); 2562 i++; 2563 } 2564 in.limit(in.position()); 2565 in.position(0); 2566 CharsetDecoder decoder = cs.newDecoder(); 2567 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2568 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2569 try{ 2570 CharBuffer out = decoder.decode(in); 2571 if(out!=null){ 2572 logln(cs.toString()+" encoing succeeded as expected!"); 2573 } 2574 }catch ( Exception ex){ 2575 errln("Did not get expected exception for encoding: "+cs.toString()); 2576 return ex; 2577 } 2578 return null; 2579 } 2580 2581 @Test TestUTF32BOM()2582 public void TestUTF32BOM(){ 2583 2584 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-32"); 2585 char[] in = new char[] { 0xd800, 0xdc00, 2586 0xd801, 0xdc01, 2587 0xdbff, 0xdfff, 2588 0xd900, 0xdd00, 2589 0x0000, 0x0041, 2590 0x0000, 0x0042, 2591 0x0000, 0x0043}; 2592 2593 CharBuffer inBuf = CharBuffer.allocate(in.length); 2594 inBuf.put(in); 2595 CharsetEncoder encoder = cs.newEncoder(); 2596 ByteBuffer outBuf = ByteBuffer.allocate(in.length*4+4); 2597 inBuf.rewind(); 2598 encoder.encode(inBuf, outBuf, true); 2599 outBuf.rewind(); 2600 if(outBuf.get(0)!= (byte)0x00 && outBuf.get(1)!= (byte)0x00 && 2601 outBuf.get(2)!= (byte)0xFF && outBuf.get(3)!= (byte)0xFE){ 2602 errln("The UTF32 encoder did not appended bom. Length returned: " + outBuf.remaining()); 2603 } 2604 while(outBuf.hasRemaining()){ 2605 logln("0x"+hex(outBuf.get())); 2606 } 2607 CharsetDecoder decoder = cs.newDecoder(); 2608 outBuf.limit(outBuf.position()); 2609 outBuf.rewind(); 2610 CharBuffer rt = CharBuffer.allocate(in.length); 2611 CoderResult cr = decoder.decode(outBuf, rt, true); 2612 if(cr.isError()){ 2613 errln("Decoding with BOM failed. Error: "+ cr.toString()); 2614 } 2615 equals(rt, in); 2616 try{ 2617 rt.clear(); 2618 outBuf.rewind(); 2619 Charset utf16 = Charset.forName("UTF-32"); 2620 CharsetDecoder dc = utf16.newDecoder(); 2621 cr = dc.decode(outBuf, rt, true); 2622 equals(rt, in); 2623 }catch(UnsupportedCharsetException ex){ 2624 // swallow the expection. 2625 } 2626 } 2627 2628 /* 2629 * Michael Ow 2630 * Modified 070424 2631 */ 2632 /*The following two methods provides the option of exceptions when Decoding 2633 * and Encoding if needed for testing purposes. 2634 */ smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target)2635 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target) { 2636 smBufDecode(decoder, encoding, source, target, true); 2637 } smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray)2638 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray) { 2639 try { 2640 smBufDecode(decoder, encoding, source, target, false, false, backedByArray); 2641 } 2642 catch (Exception ex) { 2643 System.out.println("!exception!"); 2644 } 2645 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target)2646 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target) { 2647 smBufEncode(encoder, encoding, source, target, true); 2648 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray)2649 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray) { 2650 try { 2651 smBufEncode(encoder, encoding, source, target, false, false); 2652 } 2653 catch (Exception ex) { 2654 System.out.println("!exception!"); 2655 } 2656 } 2657 2658 //Test CharsetICUProvider 2659 @Test TestNullCanonicalName()2660 public void TestNullCanonicalName() { 2661 String enc = null; 2662 String canonicalName = CharsetProviderICU.getICUCanonicalName(enc); 2663 2664 if (canonicalName != null) { 2665 errln("getICUCanonicalName return a non-null string for given null string"); 2666 } 2667 } 2668 2669 @Test TestGetAllNames()2670 public void TestGetAllNames() { 2671 String[] names = null; 2672 2673 names = CharsetProviderICU.getAllNames(); 2674 2675 if (names == null) { 2676 errln("getAllNames returned a null string."); 2677 } 2678 } 2679 2680 //Test CharsetICU 2681 @Test TestCharsetContains()2682 public void TestCharsetContains() { 2683 boolean test; 2684 2685 CharsetProvider provider = new CharsetProviderICU(); 2686 Charset cs1 = provider.charsetForName("UTF-32"); 2687 Charset cs2 = null; 2688 2689 test = cs1.contains(cs2); 2690 2691 if (test != false) { 2692 errln("Charset.contains returned true for a null charset."); 2693 } 2694 2695 cs2 = CharsetICU.forNameICU("UTF-32"); 2696 2697 test = cs1.contains(cs2); 2698 2699 if (test != true) { 2700 errln("Charset.contains returned false for an identical charset."); 2701 } 2702 2703 cs2 = provider.charsetForName("UTF-8"); 2704 2705 test = cs1.contains(cs2); 2706 2707 if (test != false) { 2708 errln("Charset.contains returned true for a different charset."); 2709 } 2710 } 2711 2712 @Test TestCharsetICUNullCharsetName()2713 public void TestCharsetICUNullCharsetName() { 2714 String charsetName = null; 2715 2716 try { 2717 CharsetICU.forNameICU(charsetName); 2718 errln("CharsetICU.forName should have thown an exception after getting a null charsetName."); 2719 } 2720 catch(Exception ex) { 2721 } 2722 } 2723 2724 //Test CharsetASCII 2725 @Test TestCharsetASCIIOverFlow()2726 public void TestCharsetASCIIOverFlow() { 2727 int byteBufferLimit; 2728 int charBufferLimit; 2729 2730 CharsetProvider provider = new CharsetProviderICU(); 2731 Charset cs = provider.charsetForName("ASCII"); 2732 CharsetEncoder encoder = cs.newEncoder(); 2733 CharsetDecoder decoder = cs.newDecoder(); 2734 2735 CharBuffer charBuffer = CharBuffer.allocate(0x90); 2736 ByteBuffer byteBuffer = ByteBuffer.allocate(0x90); 2737 2738 CharBuffer charBufferTest = CharBuffer.allocate(0xb0); 2739 ByteBuffer byteBufferTest = ByteBuffer.allocate(0xb0); 2740 2741 for(int j=0;j<=0x7f; j++){ 2742 charBuffer.put((char)j); 2743 byteBuffer.put((byte)j); 2744 } 2745 2746 byteBuffer.limit(byteBufferLimit = byteBuffer.position()); 2747 byteBuffer.position(0); 2748 charBuffer.limit(charBufferLimit = charBuffer.position()); 2749 charBuffer.position(0); 2750 2751 //test for overflow 2752 byteBufferTest.limit(byteBufferLimit - 5); 2753 byteBufferTest.position(0); 2754 charBufferTest.limit(charBufferLimit - 5); 2755 charBufferTest.position(0); 2756 try { 2757 smBufDecode(decoder, "ASCII", byteBuffer, charBufferTest, true, false); 2758 errln("Overflow exception while decoding ASCII should have been thrown."); 2759 } 2760 catch(Exception ex) { 2761 } 2762 try { 2763 smBufEncode(encoder, "ASCII", charBuffer, byteBufferTest, true, false); 2764 errln("Overflow exception while encoding ASCII should have been thrown."); 2765 } 2766 catch (Exception ex) { 2767 } 2768 2769 // For better code coverage 2770 /* For better code coverage */ 2771 byte byteout[] = { 2772 (byte)0x01 2773 }; 2774 char charin[] = { 2775 (char)0x0001, (char)0x0002 2776 }; 2777 ByteBuffer bb = ByteBuffer.wrap(byteout); 2778 CharBuffer cb = CharBuffer.wrap(charin); 2779 // Cast up to CharSequence to insulate against the CharBuffer.subSequence() return type change 2780 // which makes code compiled for a newer JDK not run on an older one. 2781 CharBuffer cb2 = CharBuffer.wrap(((CharSequence)cb).subSequence(0, 2)); 2782 encoder.reset(); 2783 if (!(encoder.encode(cb2, bb, true)).isOverflow()) { 2784 errln("Overflow error while encoding ASCII should have occurred."); 2785 } 2786 } 2787 2788 //Test CharsetUTF7 2789 @Test TestCharsetUTF7()2790 public void TestCharsetUTF7() { 2791 CoderResult result = CoderResult.UNDERFLOW; 2792 CharsetProvider provider = new CharsetProviderICU(); 2793 Charset cs = provider.charsetForName("UTF-7"); 2794 CharsetEncoder encoder = cs.newEncoder(); 2795 CharsetDecoder decoder = cs.newDecoder(); 2796 2797 CharBuffer us = CharBuffer.allocate(0x100); 2798 ByteBuffer bs = ByteBuffer.allocate(0x100); 2799 2800 /* Unicode : A<not equal to Alpha Lamda>. */ 2801 /* UTF7: AImIDkQ. */ 2802 us.put((char)0x41); us.put((char)0x2262); us.put((char)0x391); us.put((char)0x39B); us.put((char)0x2e); 2803 bs.put((byte)0x41); bs.put((byte)0x2b); bs.put((byte)0x49); bs.put((byte)0x6d); 2804 bs.put((byte)0x49); bs.put((byte)0x44); bs.put((byte)0x6b); bs.put((byte)0x51); 2805 bs.put((byte)0x4f); bs.put((byte)0x62); bs.put((byte)0x2e); 2806 2807 bs.limit(bs.position()); 2808 bs.position(0); 2809 us.limit(us.position()); 2810 us.position(0); 2811 2812 smBufDecode(decoder, "UTF-7", bs, us); 2813 smBufEncode(encoder, "UTF-7", us, bs); 2814 2815 /* Testing UTF-7 toUnicode with substitute callbacks */ 2816 { 2817 byte [] bytesTestErrorConsumption = { 2818 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 2819 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 2820 2821 }; 2822 char [] unicodeTestErrorConsumption = { 2823 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 2824 }; 2825 bs = ByteBuffer.wrap(bytesTestErrorConsumption); 2826 us = CharBuffer.wrap(unicodeTestErrorConsumption); 2827 2828 CodingErrorAction savedMal = decoder.malformedInputAction(); 2829 CodingErrorAction savedUMap = decoder.unmappableCharacterAction(); 2830 decoder.onMalformedInput(CodingErrorAction.REPLACE); 2831 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 2832 smBufDecode(decoder, "UTF-7 DE Error Consumption", bs, us); 2833 decoder.onMalformedInput(savedMal); 2834 decoder.onUnmappableCharacter(savedUMap); 2835 } 2836 /* ticket 6151 */ 2837 CharBuffer smallus = CharBuffer.allocate(1); 2838 ByteBuffer bigbs = ByteBuffer.allocate(3); 2839 bigbs.put((byte)0x41); bigbs.put((byte)0x41); bigbs.put((byte)0x41); 2840 bigbs.position(0); 2841 try { 2842 smBufDecode(decoder, "UTF-7-DE-Overflow", bigbs, smallus, true, false); 2843 errln("Buffer Overflow exception should have been thrown while decoding UTF-7."); 2844 } catch (Exception ex) { 2845 } 2846 2847 //The rest of the code in this method is to provide better code coverage 2848 CharBuffer ccus = CharBuffer.allocate(0x10); 2849 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 2850 2851 //start of charset decoder code coverage code 2852 //test for accurate illegal and control character checking 2853 ccbs.put((byte)0x0D); ccbs.put((byte)0x05); 2854 ccus.put((char)0x0000); 2855 2856 ccbs.limit(ccbs.position()); 2857 ccbs.position(0); 2858 ccus.limit(ccus.position()); 2859 ccus.position(0); 2860 2861 try { 2862 smBufDecode(decoder, "UTF-7-CC-DE-1", ccbs, ccus, true, false); 2863 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2864 } 2865 catch (Exception ex) { 2866 } 2867 2868 ccbs.clear(); 2869 ccus.clear(); 2870 2871 //test for illegal base64 character 2872 ccbs.put((byte)0x2b); ccbs.put((byte)0xff); 2873 ccus.put((char)0x0000); 2874 2875 ccbs.limit(ccbs.position()); 2876 ccbs.position(0); 2877 ccus.limit(ccus.position()); 2878 ccus.position(0); 2879 2880 try { 2881 smBufDecode(decoder, "UTF-7-CC-DE-2", ccbs, ccus, true, false); 2882 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2883 } 2884 catch (Exception ex) { 2885 } 2886 2887 ccbs.clear(); 2888 ccus.clear(); 2889 2890 //test for illegal order of the base64 character sequence 2891 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 2892 ccus.put((char)0x0000); ccus.put((char)0x0000); 2893 2894 ccbs.limit(ccbs.position()); 2895 ccbs.position(0); 2896 ccus.limit(ccus.position()); 2897 ccus.position(0); 2898 2899 try { 2900 smBufDecode(decoder, "UTF-7-CC-DE-3", ccbs, ccus, true, false); 2901 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2902 } 2903 catch (Exception ex) { 2904 } 2905 2906 ccbs.clear(); 2907 ccus.clear(); 2908 2909 //test for illegal order of the base64 character sequence 2910 ccbs.put((byte)0x2b); ccbs.put((byte)0x0a); ccbs.put((byte)0x09); 2911 ccus.put((char)0x0000); 2912 2913 ccbs.limit(ccbs.position()); 2914 ccbs.position(0); 2915 ccus.limit(ccus.position()); 2916 ccus.position(0); 2917 2918 try { 2919 smBufDecode(decoder, "UTF-7-CC-DE-4", ccbs, ccus, true, false); 2920 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2921 } 2922 catch (Exception ex) { 2923 } 2924 2925 ccbs.clear(); 2926 ccus.clear(); 2927 2928 //test for illegal order of the base64 character sequence 2929 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x0a); 2930 ccus.put((char)0x0000); 2931 2932 ccbs.limit(ccbs.position()); 2933 ccbs.position(0); 2934 ccus.limit(ccus.position()); 2935 ccus.position(0); 2936 2937 try { 2938 smBufDecode(decoder, "UTF-7-CC-DE-5", ccbs, ccus, true, false); 2939 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2940 } 2941 catch (Exception ex) { 2942 } 2943 2944 ccbs.clear(); 2945 ccus.clear(); 2946 2947 //test for illegal order of the base64 character sequence 2948 ccbs.put((byte)0x2b); ccbs.put((byte)0x00); 2949 ccus.put((char)0x0000); 2950 2951 ccbs.limit(ccbs.position()); 2952 ccbs.position(0); 2953 ccus.limit(ccus.position()); 2954 ccus.position(0); 2955 2956 try { 2957 smBufDecode(decoder, "UTF-7-CC-DE-6", ccbs, ccus, true, false); 2958 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2959 } 2960 catch (Exception ex) { 2961 } 2962 2963 ccbs.clear(); 2964 ccus.clear(); 2965 2966 //test for overflow buffer error 2967 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); 2968 2969 ccbs.limit(ccbs.position()); 2970 ccbs.position(0); 2971 ccus.limit(0); 2972 ccus.position(0); 2973 2974 try { 2975 smBufDecode(decoder, "UTF-7-CC-DE-7", ccbs, ccus, true, false); 2976 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2977 } 2978 catch (Exception ex) { 2979 } 2980 2981 ccbs.clear(); 2982 ccus.clear(); 2983 2984 //test for overflow buffer error 2985 ccbs.put((byte)0x0c); ccbs.put((byte)0x0c); 2986 2987 ccbs.limit(ccbs.position()); 2988 ccbs.position(0); 2989 ccus.limit(0); 2990 ccus.position(0); 2991 2992 try { 2993 smBufDecode(decoder, "UTF-7-CC-DE-8", ccbs, ccus, true, false); 2994 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2995 } 2996 catch (Exception ex) { 2997 } 2998 //end of charset decoder code coverage code 2999 3000 //start of charset encoder code coverage code 3001 ccbs.clear(); 3002 ccus.clear(); 3003 //test for overflow buffer error 3004 ccus.put((char)0x002b); 3005 ccbs.put((byte)0x2b); 3006 3007 ccbs.limit(ccbs.position()); 3008 ccbs.position(0); 3009 ccus.limit(ccus.position()); 3010 ccus.position(0); 3011 3012 try { 3013 smBufEncode(encoder, "UTF-7-CC-EN-1", ccus, ccbs, true, false); 3014 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3015 } 3016 catch (Exception ex) { 3017 } 3018 3019 ccbs.clear(); 3020 ccus.clear(); 3021 3022 //test for overflow buffer error 3023 ccus.put((char)0x002b); ccus.put((char)0x2262); 3024 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3025 3026 ccbs.limit(ccbs.position()); 3027 ccbs.position(0); 3028 ccus.limit(ccus.position()); 3029 ccus.position(0); 3030 3031 try { 3032 smBufEncode(encoder, "UTF-7-CC-EN-2", ccus, ccbs, true, false); 3033 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3034 } 3035 catch (Exception ex) { 3036 } 3037 3038 ccbs.clear(); 3039 ccus.clear(); 3040 3041 //test for overflow buffer error 3042 ccus.put((char)0x2262); ccus.put((char)0x0049); 3043 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3044 ccbs.limit(ccbs.position()); 3045 ccbs.position(0); 3046 ccus.limit(ccus.position()); 3047 ccus.position(0); 3048 3049 try { 3050 smBufEncode(encoder, "UTF-7-CC-EN-3", ccus, ccbs, true, false); 3051 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3052 } 3053 catch (Exception ex) { 3054 } 3055 3056 ccbs.clear(); 3057 ccus.clear(); 3058 3059 //test for overflow buffer error 3060 ccus.put((char)0x2262); ccus.put((char)0x0395); 3061 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3062 ccbs.limit(ccbs.position()); 3063 ccbs.position(0); 3064 ccus.limit(ccus.position()); 3065 ccus.position(0); 3066 3067 try { 3068 smBufEncode(encoder, "UTF-7-CC-EN-4", ccus, ccbs, true, false); 3069 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3070 } 3071 catch (Exception ex) { 3072 } 3073 3074 ccbs.clear(); 3075 ccus.clear(); 3076 3077 //test for overflow buffer error 3078 ccus.put((char)0x2262); ccus.put((char)0x0395); 3079 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3080 ccbs.limit(ccbs.position()); 3081 ccbs.position(0); 3082 ccus.limit(ccus.position()); 3083 ccus.position(0); 3084 3085 try { 3086 smBufEncode(encoder, "UTF-7-CC-EN-5", ccus, ccbs, true, false); 3087 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3088 } 3089 catch (Exception ex) { 3090 } 3091 3092 ccbs.clear(); 3093 ccus.clear(); 3094 3095 //test for overflow buffer error 3096 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3097 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3098 ccbs.limit(ccbs.position()); 3099 ccbs.position(0); 3100 ccus.limit(ccus.position()); 3101 ccus.position(0); 3102 3103 try { 3104 smBufEncode(encoder, "UTF-7-CC-EN-6", ccus, ccbs, true, false); 3105 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3106 } 3107 catch (Exception ex) { 3108 } 3109 3110 ccbs.clear(); 3111 ccus.clear(); 3112 3113 //test for overflow buffer error 3114 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3115 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3116 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3117 ccbs.limit(ccbs.position()); 3118 ccbs.position(0); 3119 ccus.limit(ccus.position()); 3120 ccus.position(0); 3121 3122 try { 3123 smBufEncode(encoder, "UTF-7-CC-EN-7", ccus, ccbs, true, false); 3124 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3125 } 3126 catch (Exception ex) { 3127 } 3128 3129 ccbs.clear(); 3130 ccus.clear(); 3131 3132 //test for overflow buffer error 3133 ccus.put((char)0x0049); ccus.put((char)0x0048); 3134 ccbs.put((byte)0x00); 3135 ccbs.limit(ccbs.position()); 3136 ccbs.position(0); 3137 ccus.limit(ccus.position()); 3138 ccus.position(0); 3139 3140 try { 3141 smBufEncode(encoder, "UTF-7-CC-EN-8", ccus, ccbs, true, false); 3142 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3143 } 3144 catch (Exception ex) { 3145 } 3146 3147 ccbs.clear(); 3148 ccus.clear(); 3149 3150 //test for overflow buffer error 3151 ccus.put((char)0x2262); 3152 ccbs.put((byte)0x00); 3153 ccbs.limit(ccbs.position()); 3154 ccbs.position(0); 3155 ccus.limit(ccus.position()); 3156 ccus.position(0); 3157 3158 try { 3159 smBufEncode(encoder, "UTF-7-CC-EN-9", ccus, ccbs, true, false); 3160 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3161 } 3162 catch (Exception ex) { 3163 } 3164 3165 ccbs.clear(); 3166 ccus.clear(); 3167 3168 //test for overflow buffer error 3169 ccus.put((char)0x2262); ccus.put((char)0x0049); 3170 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3171 ccbs.limit(ccbs.position()); 3172 ccbs.position(0); 3173 ccus.limit(ccus.position()); 3174 ccus.position(0); 3175 3176 try { 3177 smBufEncode(encoder, "UTF-7-CC-EN-10", ccus, ccbs, true, false); 3178 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3179 } 3180 catch (Exception ex) { 3181 } 3182 3183 ccbs.clear(); 3184 ccus.clear(); 3185 3186 //test for overflow buffer error 3187 ccus.put((char)0x2262); 3188 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x6d); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 3189 3190 ccbs.limit(ccbs.position()); 3191 ccbs.position(0); 3192 ccus.limit(ccus.position()); 3193 ccus.position(0); 3194 try { 3195 smBufEncode(encoder, "UTF-7-CC-EN-11", ccus, ccbs, false, true); 3196 } catch (Exception ex) { 3197 errln("Exception while encoding UTF-7 code coverage test should not have been thrown."); 3198 } 3199 3200 ccbs.clear(); 3201 ccus.clear(); 3202 3203 //test for overflow buffer error 3204 encoder.reset(); 3205 ccus.put((char)0x3980); ccus.put((char)0x2715); 3206 ccbs.put((byte)0x2b); ccbs.put((byte)0x4f); ccbs.put((byte)0x59); ccbs.put((byte)0x2d); 3207 3208 ccbs.limit(ccbs.position()); 3209 ccbs.position(0); 3210 ccus.limit(ccus.position()); 3211 ccus.position(0); 3212 3213 result = encoder.encode(ccus, ccbs, true); 3214 result = encoder.flush(ccbs); 3215 if (!result.isOverflow()) { 3216 errln("Overflow buffer while encoding UTF-7 should have occurred."); 3217 } 3218 //end of charset encoder code coverage code 3219 } 3220 3221 @Test TestBug12956()3222 public void TestBug12956() { 3223 final CharsetProvider provider = new CharsetProviderICU(); 3224 final Charset cs_utf7 = provider.charsetForName("UTF-7"); 3225 final Charset cs_imap = provider.charsetForName("IMAP-mailbox-name"); 3226 final String test = "新"; 3227 final byte[] expected_utf7 = {0x2b, 0x5a, 0x62, 0x41, 0x2d}; 3228 final byte[] expected_imap = {0x26, 0x5a, 0x62, 0x41, 0x2d}; 3229 3230 byte[] bytes = test.getBytes(cs_utf7); 3231 if (!Arrays.equals(bytes, expected_utf7)) { 3232 errln("Incorrect UTF-7 conversion. Got " + new String(bytes) + " but expect " + 3233 new String(expected_utf7)); 3234 } 3235 3236 bytes = test.getBytes(cs_imap); 3237 if (!Arrays.equals(bytes, expected_imap)) { 3238 errln("Incorrect IMAP-mailbox-name conversion. Got " + new String(bytes) + 3239 " but expect " + new String(expected_imap)); 3240 } 3241 } 3242 3243 //Test Charset ISCII 3244 @Test TestCharsetISCII()3245 public void TestCharsetISCII() { 3246 CharsetProvider provider = new CharsetProviderICU(); 3247 Charset cs = provider.charsetForName("ISCII,version=0"); 3248 CharsetEncoder encoder = cs.newEncoder(); 3249 CharsetDecoder decoder = cs.newDecoder(); 3250 3251 CharBuffer us = CharBuffer.allocate(0x100); 3252 ByteBuffer bs = ByteBuffer.allocate(0x100); 3253 ByteBuffer bsr = ByteBuffer.allocate(0x100); 3254 3255 //test full range of Devanagari 3256 us.put((char)0x0901); us.put((char)0x0902); us.put((char)0x0903); us.put((char)0x0905); us.put((char)0x0906); us.put((char)0x0907); 3257 us.put((char)0x0908); us.put((char)0x0909); us.put((char)0x090A); us.put((char)0x090B); us.put((char)0x090E); us.put((char)0x090F); 3258 us.put((char)0x0910); us.put((char)0x090D); us.put((char)0x0912); us.put((char)0x0913); us.put((char)0x0914); us.put((char)0x0911); 3259 us.put((char)0x0915); us.put((char)0x0916); us.put((char)0x0917); us.put((char)0x0918); us.put((char)0x0919); us.put((char)0x091A); 3260 us.put((char)0x091B); us.put((char)0x091C); us.put((char)0x091D); us.put((char)0x091E); us.put((char)0x091F); us.put((char)0x0920); 3261 us.put((char)0x0921); us.put((char)0x0922); us.put((char)0x0923); us.put((char)0x0924); us.put((char)0x0925); us.put((char)0x0926); 3262 us.put((char)0x0927); us.put((char)0x0928); us.put((char)0x0929); us.put((char)0x092A); us.put((char)0x092B); us.put((char)0x092C); 3263 us.put((char)0x092D); us.put((char)0x092E); us.put((char)0x092F); us.put((char)0x095F); us.put((char)0x0930); us.put((char)0x0931); 3264 us.put((char)0x0932); us.put((char)0x0933); us.put((char)0x0934); us.put((char)0x0935); us.put((char)0x0936); us.put((char)0x0937); 3265 us.put((char)0x0938); us.put((char)0x0939); us.put((char)0x200D); us.put((char)0x093E); us.put((char)0x093F); us.put((char)0x0940); 3266 us.put((char)0x0941); us.put((char)0x0942); us.put((char)0x0943); us.put((char)0x0946); us.put((char)0x0947); us.put((char)0x0948); 3267 us.put((char)0x0945); us.put((char)0x094A); us.put((char)0x094B); us.put((char)0x094C); us.put((char)0x0949); us.put((char)0x094D); 3268 us.put((char)0x093D); us.put((char)0x0966); us.put((char)0x0967); us.put((char)0x0968); us.put((char)0x0969); us.put((char)0x096A); 3269 us.put((char)0x096B); us.put((char)0x096C); us.put((char)0x096D); us.put((char)0x096E); us.put((char)0x096F); 3270 3271 bs.put((byte)0xEF); bs.put((byte)0x42); 3272 bs.put((byte)0xA1); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xA4); bs.put((byte)0xA5); bs.put((byte)0xA6); 3273 bs.put((byte)0xA7); bs.put((byte)0xA8); bs.put((byte)0xA9); bs.put((byte)0xAA); bs.put((byte)0xAB); bs.put((byte)0xAC); 3274 bs.put((byte)0xAD); bs.put((byte)0xAE); bs.put((byte)0xAF); bs.put((byte)0xB0); bs.put((byte)0xB1); bs.put((byte)0xB2); 3275 bs.put((byte)0xB3); bs.put((byte)0xB4); bs.put((byte)0xB5); bs.put((byte)0xB6); bs.put((byte)0xB7); bs.put((byte)0xB8); 3276 bs.put((byte)0xB9); bs.put((byte)0xBA); bs.put((byte)0xBB); bs.put((byte)0xBC); bs.put((byte)0xBD); bs.put((byte)0xBE); 3277 bs.put((byte)0xBF); bs.put((byte)0xC0); bs.put((byte)0xC1); bs.put((byte)0xC2); bs.put((byte)0xC3); bs.put((byte)0xC4); 3278 bs.put((byte)0xC5); bs.put((byte)0xC6); bs.put((byte)0xC7); bs.put((byte)0xC8); bs.put((byte)0xC9); bs.put((byte)0xCA); 3279 bs.put((byte)0xCB); bs.put((byte)0xCC); bs.put((byte)0xCD); bs.put((byte)0xCE); bs.put((byte)0xCF); bs.put((byte)0xD0); 3280 bs.put((byte)0xD1); bs.put((byte)0xD2); bs.put((byte)0xD3); bs.put((byte)0xD4); bs.put((byte)0xD5); bs.put((byte)0xD6); 3281 bs.put((byte)0xD7); bs.put((byte)0xD8); bs.put((byte)0xD9); bs.put((byte)0xDA); bs.put((byte)0xDB); bs.put((byte)0xDC); 3282 bs.put((byte)0xDD); bs.put((byte)0xDE); bs.put((byte)0xDF); bs.put((byte)0xE0); bs.put((byte)0xE1); bs.put((byte)0xE2); 3283 bs.put((byte)0xE3); bs.put((byte)0xE4); bs.put((byte)0xE5); bs.put((byte)0xE6); bs.put((byte)0xE7); bs.put((byte)0xE8); 3284 bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xF1); bs.put((byte)0xF2); bs.put((byte)0xF3); bs.put((byte)0xF4); 3285 bs.put((byte)0xF5); bs.put((byte)0xF6); bs.put((byte)0xF7); bs.put((byte)0xF8); bs.put((byte)0xF9); bs.put((byte)0xFA); 3286 3287 bsr.put((byte)0xA1); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xA4); bsr.put((byte)0xA5); bsr.put((byte)0xA6); 3288 bsr.put((byte)0xA7); bsr.put((byte)0xA8); bsr.put((byte)0xA9); bsr.put((byte)0xAA); bsr.put((byte)0xAB); bsr.put((byte)0xAC); 3289 bsr.put((byte)0xAD); bsr.put((byte)0xAE); bsr.put((byte)0xAF); bsr.put((byte)0xB0); bsr.put((byte)0xB1); bsr.put((byte)0xB2); 3290 bsr.put((byte)0xB3); bsr.put((byte)0xB4); bsr.put((byte)0xB5); bsr.put((byte)0xB6); bsr.put((byte)0xB7); bsr.put((byte)0xB8); 3291 bsr.put((byte)0xB9); bsr.put((byte)0xBA); bsr.put((byte)0xBB); bsr.put((byte)0xBC); bsr.put((byte)0xBD); bsr.put((byte)0xBE); 3292 bsr.put((byte)0xBF); bsr.put((byte)0xC0); bsr.put((byte)0xC1); bsr.put((byte)0xC2); bsr.put((byte)0xC3); bsr.put((byte)0xC4); 3293 bsr.put((byte)0xC5); bsr.put((byte)0xC6); bsr.put((byte)0xC7); bsr.put((byte)0xC8); bsr.put((byte)0xC9); bsr.put((byte)0xCA); 3294 bsr.put((byte)0xCB); bsr.put((byte)0xCC); bsr.put((byte)0xCD); bsr.put((byte)0xCE); bsr.put((byte)0xCF); bsr.put((byte)0xD0); 3295 bsr.put((byte)0xD1); bsr.put((byte)0xD2); bsr.put((byte)0xD3); bsr.put((byte)0xD4); bsr.put((byte)0xD5); bsr.put((byte)0xD6); 3296 bsr.put((byte)0xD7); bsr.put((byte)0xD8); bsr.put((byte)0xD9); bsr.put((byte)0xDA); bsr.put((byte)0xDB); bsr.put((byte)0xDC); 3297 bsr.put((byte)0xDD); bsr.put((byte)0xDE); bsr.put((byte)0xDF); bsr.put((byte)0xE0); bsr.put((byte)0xE1); bsr.put((byte)0xE2); 3298 bsr.put((byte)0xE3); bsr.put((byte)0xE4); bsr.put((byte)0xE5); bsr.put((byte)0xE6); bsr.put((byte)0xE7); bsr.put((byte)0xE8); 3299 bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xF1); bsr.put((byte)0xF2); bsr.put((byte)0xF3); bsr.put((byte)0xF4); 3300 bsr.put((byte)0xF5); bsr.put((byte)0xF6); bsr.put((byte)0xF7); bsr.put((byte)0xF8); bsr.put((byte)0xF9); bsr.put((byte)0xFA); 3301 3302 //test Soft Halant 3303 us.put((char)0x0915); us.put((char)0x094d); us.put((char)0x200D); 3304 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE9); 3305 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE9); 3306 3307 //test explicit halant 3308 us.put((char)0x0915); us.put((char)0x094D); us.put((char)0x200C); 3309 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE8); 3310 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE8); 3311 3312 //test double danda 3313 us.put((char)0x0965); 3314 bs.put((byte)0xEA); bs.put((byte)0xEA); 3315 bsr.put((byte)0xEA); bsr.put((byte)0xEA); 3316 3317 //test ASCII 3318 us.put((char)0x1B); us.put((char)0x24); us.put((char)0x29); us.put((char)0x47); us.put((char)0x0E); us.put((char)0x23); 3319 us.put((char)0x21); us.put((char)0x23); us.put((char)0x22); us.put((char)0x23); us.put((char)0x23); us.put((char)0x23); 3320 us.put((char)0x24); us.put((char)0x23); us.put((char)0x25); us.put((char)0x23); us.put((char)0x26); us.put((char)0x23); 3321 us.put((char)0x27); us.put((char)0x23); us.put((char)0x28); us.put((char)0x23); us.put((char)0x29); us.put((char)0x23); 3322 us.put((char)0x2A); us.put((char)0x23); us.put((char)0x2B); us.put((char)0x0F); us.put((char)0x2F); us.put((char)0x2A); 3323 3324 bs.put((byte)0x1B); bs.put((byte)0x24); bs.put((byte)0x29); bs.put((byte)0x47); bs.put((byte)0x0E); bs.put((byte)0x23); 3325 bs.put((byte)0x21); bs.put((byte)0x23); bs.put((byte)0x22); bs.put((byte)0x23); bs.put((byte)0x23); bs.put((byte)0x23); 3326 bs.put((byte)0x24); bs.put((byte)0x23); bs.put((byte)0x25); bs.put((byte)0x23); bs.put((byte)0x26); bs.put((byte)0x23); 3327 bs.put((byte)0x27); bs.put((byte)0x23); bs.put((byte)0x28); bs.put((byte)0x23); bs.put((byte)0x29); bs.put((byte)0x23); 3328 bs.put((byte)0x2A); bs.put((byte)0x23); bs.put((byte)0x2B); bs.put((byte)0x0F); bs.put((byte)0x2F); bs.put((byte)0x2A); 3329 3330 bsr.put((byte)0x1B); bsr.put((byte)0x24); bsr.put((byte)0x29); bsr.put((byte)0x47); bsr.put((byte)0x0E); bsr.put((byte)0x23); 3331 bsr.put((byte)0x21); bsr.put((byte)0x23); bsr.put((byte)0x22); bsr.put((byte)0x23); bsr.put((byte)0x23); bsr.put((byte)0x23); 3332 bsr.put((byte)0x24); bsr.put((byte)0x23); bsr.put((byte)0x25); bsr.put((byte)0x23); bsr.put((byte)0x26); bsr.put((byte)0x23); 3333 bsr.put((byte)0x27); bsr.put((byte)0x23); bsr.put((byte)0x28); bsr.put((byte)0x23); bsr.put((byte)0x29); bsr.put((byte)0x23); 3334 bsr.put((byte)0x2A); bsr.put((byte)0x23); bsr.put((byte)0x2B); bsr.put((byte)0x0F); bsr.put((byte)0x2F); bsr.put((byte)0x2A); 3335 3336 //test from Lotus 3337 //Some of the Lotus ISCII code points have been changed or commented out. 3338 us.put((char)0x0061); us.put((char)0x0915); us.put((char)0x000D); us.put((char)0x000A); us.put((char)0x0996); us.put((char)0x0043); 3339 us.put((char)0x0930); us.put((char)0x094D); us.put((char)0x200D); us.put((char)0x0901); us.put((char)0x000D); us.put((char)0x000A); 3340 us.put((char)0x0905); us.put((char)0x0985); us.put((char)0x0043); us.put((char)0x0915); us.put((char)0x0921); us.put((char)0x002B); 3341 us.put((char)0x095F); 3342 bs.put((byte)0x61); bs.put((byte)0xB3); 3343 bs.put((byte)0x0D); bs.put((byte)0x0A); 3344 bs.put((byte)0xEF); bs.put((byte)0x42); 3345 bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xB4); bs.put((byte)0x43); 3346 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xCF); bs.put((byte)0xE8); bs.put((byte)0xE9); bs.put((byte)0xA1); bs.put((byte)0x0D); bs.put((byte)0x0A); bs.put((byte)0xEF); bs.put((byte)0x42); 3347 bs.put((byte)0xA4); bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xA4); bs.put((byte)0x43); bs.put((byte)0xEF); 3348 bs.put((byte)0x42); bs.put((byte)0xB3); bs.put((byte)0xBF); bs.put((byte)0x2B); 3349 bs.put((byte)0xCE); 3350 bsr.put((byte)0x61); bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xEF); bsr.put((byte)0x30); bsr.put((byte)0xB3); 3351 bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xB4); bsr.put((byte)0x43); 3352 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xCF); bsr.put((byte)0xE8); bsr.put((byte)0xD9); bsr.put((byte)0xEF); 3353 bsr.put((byte)0x42); bsr.put((byte)0xA1); bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3354 bsr.put((byte)0xA4); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xA4); bsr.put((byte)0x43); bsr.put((byte)0xEF); 3355 bsr.put((byte)0x42); bsr.put((byte)0xB3); bsr.put((byte)0xBF); bsr.put((byte)0x2B); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3356 bsr.put((byte)0xCE); 3357 //end of test from Lotus 3358 3359 //tamil range 3360 us.put((char)0x0B86); us.put((char)0x0B87); us.put((char)0x0B88); 3361 bs.put((byte)0xEF); bs.put((byte)0x44); bs.put((byte)0xA5); bs.put((byte)0xA6); bs.put((byte)0xA7); 3362 bsr.put((byte)0xEF); bsr.put((byte)0x44); bsr.put((byte)0xA5); bsr.put((byte)0xA6); bsr.put((byte)0xA7); 3363 3364 //telugu range 3365 us.put((char)0x0C05); us.put((char)0x0C02); us.put((char)0x0C03); us.put((char)0x0C31); 3366 bs.put((byte)0xEF); bs.put((byte)0x45); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xD0); 3367 bsr.put((byte)0xEF); bsr.put((byte)0x45); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xD0); 3368 3369 //kannada range 3370 us.put((char)0x0C85); us.put((char)0x0C82); us.put((char)0x0C83); 3371 bs.put((byte)0xEF); bs.put((byte)0x48); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); 3372 bsr.put((byte)0xEF); bsr.put((byte)0x48); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); 3373 3374 //test Abbr sign and Anudatta 3375 us.put((char)0x0970); us.put((char)0x0952); us.put((char)0x0960); us.put((char)0x0944); us.put((char)0x090C); us.put((char)0x0962); 3376 us.put((char)0x0961); us.put((char)0x0963); us.put((char)0x0950); us.put((char)0x093D); us.put((char)0x0958); us.put((char)0x0959); 3377 us.put((char)0x095A); us.put((char)0x095B); us.put((char)0x095C); us.put((char)0x095D); us.put((char)0x095E); us.put((char)0x0020); 3378 us.put((char)0x094D); us.put((char)0x0930); us.put((char)0x0000); us.put((char)0x00A0); 3379 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xF0); bs.put((byte)0xBF); bs.put((byte)0xF0); bs.put((byte)0xB8); 3380 bs.put((byte)0xAA); bs.put((byte)0xE9); bs.put((byte)0xDF); bs.put((byte)0xE9); bs.put((byte)0xA6); bs.put((byte)0xE9); 3381 bs.put((byte)0xDB); bs.put((byte)0xE9); bs.put((byte)0xA7); bs.put((byte)0xE9); bs.put((byte)0xDC); bs.put((byte)0xE9); 3382 bs.put((byte)0xA1); bs.put((byte)0xE9); bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xB3); bs.put((byte)0xE9); 3383 bs.put((byte)0xB4); bs.put((byte)0xE9); bs.put((byte)0xB5); bs.put((byte)0xE9); bs.put((byte)0xBA); bs.put((byte)0xE9); 3384 bs.put((byte)0xBF); bs.put((byte)0xE9); bs.put((byte)0xC0); bs.put((byte)0xE9); bs.put((byte)0xC9); bs.put((byte)0xE9); 3385 bs.put((byte)0x20); bs.put((byte)0xE8); bs.put((byte)0xCF); bs.put((byte)0x00); bs.put((byte)0xA0); 3386 //bs.put((byte)0xEF); bs.put((byte)0x30); 3387 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xF0); bsr.put((byte)0xBF); bsr.put((byte)0xF0); bsr.put((byte)0xB8); 3388 bsr.put((byte)0xAA); bsr.put((byte)0xE9); bsr.put((byte)0xDF); bsr.put((byte)0xE9); bsr.put((byte)0xA6); bsr.put((byte)0xE9); 3389 bsr.put((byte)0xDB); bsr.put((byte)0xE9); bsr.put((byte)0xA7); bsr.put((byte)0xE9); bsr.put((byte)0xDC); bsr.put((byte)0xE9); 3390 bsr.put((byte)0xA1); bsr.put((byte)0xE9); bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xB3); bsr.put((byte)0xE9); 3391 bsr.put((byte)0xB4); bsr.put((byte)0xE9); bsr.put((byte)0xB5); bsr.put((byte)0xE9); bsr.put((byte)0xBA); bsr.put((byte)0xE9); 3392 bsr.put((byte)0xBF); bsr.put((byte)0xE9); bsr.put((byte)0xC0); bsr.put((byte)0xE9); bsr.put((byte)0xC9); bsr.put((byte)0xE9); 3393 bsr.put((byte)0xD9); bsr.put((byte)0xE8); bsr.put((byte)0xCF); bsr.put((byte)0x00); bsr.put((byte)0xA0); 3394 3395 bs.limit(bs.position()); 3396 bs.position(0); 3397 us.limit(us.position()); 3398 us.position(0); 3399 bsr.limit(bsr.position()); 3400 bsr.position(0); 3401 3402 //round trip test 3403 try { 3404 smBufDecode(decoder, "ISCII-part1", bsr, us, false, true); 3405 smBufEncode(encoder, "ISCII-part2", us, bs); 3406 smBufDecode(decoder, "ISCII-part3", bs, us, false, true); 3407 } catch (Exception ex) { 3408 errln("ISCII round trip test failed."); 3409 } 3410 3411 //Test new characters in the ISCII charset 3412 encoder = provider.charsetForName("ISCII,version=0").newEncoder(); 3413 decoder = provider.charsetForName("ISCII,version=0").newDecoder(); 3414 char u_pts[] = { 3415 /* DEV */ (char)0x0904, 3416 /* PNJ */ (char)0x0A01, (char)0x0A03, (char)0x0A33, (char)0x0A70 3417 }; 3418 byte b_pts[] = { 3419 (byte)0xef, (byte)0x42, 3420 /* DEV */ (byte)0xa4, (byte)0xe0, 3421 /* PNJ */ (byte)0xef, (byte)0x4b, (byte)0xa1, (byte)0xa3, (byte)0xd2, (byte)0xf0, (byte)0xbf 3422 }; 3423 us = CharBuffer.allocate(u_pts.length); 3424 bs = ByteBuffer.allocate(b_pts.length); 3425 us.put(u_pts); 3426 bs.put(b_pts); 3427 3428 bs.limit(bs.position()); 3429 bs.position(0); 3430 us.limit(us.position()); 3431 us.position(0); 3432 3433 try { 3434 smBufDecode(decoder, "ISCII-update", bs, us, true, true); 3435 bs.position(0); 3436 us.position(0); 3437 smBufEncode(encoder, "ISCII-update", us, bs, true, true); 3438 } catch (Exception ex) { 3439 errln("Error occurred while encoding/decoding ISCII with the new characters."); 3440 } 3441 3442 //The rest of the code in this method is to provide better code coverage 3443 CharBuffer ccus = CharBuffer.allocate(0x10); 3444 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 3445 3446 //start of charset decoder code coverage code 3447 //test overflow buffer 3448 ccbs.put((byte)0x49); 3449 3450 ccbs.limit(ccbs.position()); 3451 ccbs.position(0); 3452 ccus.limit(0); 3453 ccus.position(0); 3454 3455 try { 3456 smBufDecode(decoder, "ISCII-CC-DE-1", ccbs, ccus, true, false); 3457 errln("Exception while decoding ISCII should have been thrown."); 3458 } 3459 catch (Exception ex) { 3460 } 3461 3462 ccbs.clear(); 3463 ccus.clear(); 3464 3465 //test atr overflow buffer 3466 ccbs.put((byte)0xEF); ccbs.put((byte)0x40); ccbs.put((byte)0xEF); ccbs.put((byte)0x20); 3467 ccus.put((char)0x00); 3468 3469 ccbs.limit(ccbs.position()); 3470 ccbs.position(0); 3471 ccus.limit(ccus.position()); 3472 ccus.position(0); 3473 3474 try { 3475 smBufDecode(decoder, "ISCII-CC-DE-2", ccbs, ccus, true, false); 3476 errln("Exception while decoding ISCII should have been thrown."); 3477 } 3478 catch (Exception ex) { 3479 } 3480 3481 //end of charset decoder code coverage code 3482 3483 ccbs.clear(); 3484 ccus.clear(); 3485 3486 //start of charset encoder code coverage code 3487 //test ascii overflow buffer 3488 ccus.put((char)0x41); 3489 3490 ccus.limit(ccus.position()); 3491 ccus.position(0); 3492 ccbs.limit(0); 3493 ccbs.position(0); 3494 3495 try { 3496 smBufEncode(encoder, "ISCII-CC-EN-1", ccus, ccbs, true, false); 3497 errln("Exception while encoding ISCII should have been thrown."); 3498 } 3499 catch (Exception ex) { 3500 } 3501 3502 ccbs.clear(); 3503 ccus.clear(); 3504 3505 //test ascii overflow buffer 3506 ccus.put((char)0x0A); ccus.put((char)0x0043); 3507 ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3508 3509 ccus.limit(ccus.position()); 3510 ccus.position(0); 3511 ccbs.limit(ccbs.position()); 3512 ccbs.position(0); 3513 3514 try { 3515 smBufEncode(encoder, "ISCII-CC-EN-2", ccus, ccbs, true, false); 3516 errln("Exception while encoding ISCII should have been thrown."); 3517 } 3518 catch (Exception ex) { 3519 } 3520 3521 ccbs.clear(); 3522 ccus.clear(); 3523 3524 //test surrogate malform 3525 ccus.put((char)0x06E3); 3526 ccbs.put((byte)0x00); 3527 3528 ccus.limit(ccus.position()); 3529 ccus.position(0); 3530 ccbs.limit(ccbs.position()); 3531 ccbs.position(0); 3532 3533 try { 3534 smBufEncode(encoder, "ISCII-CC-EN-3", ccus, ccbs, true, false); 3535 errln("Exception while encoding ISCII should have been thrown."); 3536 } 3537 catch (Exception ex) { 3538 } 3539 3540 ccbs.clear(); 3541 ccus.clear(); 3542 3543 //test surrogate malform 3544 ccus.put((char)0xD801); ccus.put((char)0xDD01); 3545 ccbs.put((byte)0x00); 3546 3547 ccus.limit(ccus.position()); 3548 ccus.position(0); 3549 ccbs.limit(ccbs.position()); 3550 ccbs.position(0); 3551 3552 try { 3553 smBufEncode(encoder, "ISCII-CC-EN-4", ccus, ccbs, true, false); 3554 errln("Exception while encoding ISCII should have been thrown."); 3555 } 3556 catch (Exception ex) { 3557 } 3558 3559 ccbs.clear(); 3560 ccus.clear(); 3561 3562 //test trail surrogate malform 3563 ccus.put((char)0xDD01); 3564 ccbs.put((byte)0x00); 3565 3566 ccus.limit(ccus.position()); 3567 ccus.position(0); 3568 ccbs.limit(ccbs.position()); 3569 ccbs.position(0); 3570 3571 try { 3572 smBufEncode(encoder, "ISCII-CC-EN-5", ccus, ccbs, true, false); 3573 errln("Exception while encoding ISCII should have been thrown."); 3574 } 3575 catch (Exception ex) { 3576 } 3577 3578 ccbs.clear(); 3579 ccus.clear(); 3580 3581 //test lead surrogates malform 3582 ccus.put((char)0xD801); ccus.put((char)0xD802); 3583 ccbs.put((byte)0x00); 3584 3585 ccus.limit(ccus.position()); 3586 ccus.position(0); 3587 ccbs.limit(ccbs.position()); 3588 ccbs.position(0); 3589 3590 try { 3591 smBufEncode(encoder, "ISCII-CC-EN-6", ccus, ccbs, true, false); 3592 errln("Exception while encoding ISCII should have been thrown."); 3593 } 3594 catch (Exception ex) { 3595 } 3596 3597 ccus.clear(); 3598 ccbs.clear(); 3599 3600 //test overflow buffer 3601 ccus.put((char)0x0901); 3602 ccbs.put((byte)0x00); 3603 3604 ccus.limit(ccus.position()); 3605 ccus.position(0); 3606 ccbs.limit(ccbs.position()); 3607 ccbs.position(0); 3608 3609 cs = provider.charsetForName("ISCII,version=0"); 3610 encoder = cs.newEncoder(); 3611 3612 try { 3613 smBufEncode(encoder, "ISCII-CC-EN-7", ccus, ccbs, true, false); 3614 errln("Exception while encoding ISCII should have been thrown."); 3615 } 3616 catch (Exception ex) { 3617 } 3618 //end of charset encoder code coverage code 3619 } 3620 3621 //Test for the IMAP Charset 3622 @Test TestCharsetIMAP()3623 public void TestCharsetIMAP() { 3624 CharsetProvider provider = new CharsetProviderICU(); 3625 Charset cs = provider.charsetForName("IMAP-mailbox-name"); 3626 CharsetEncoder encoder = cs.newEncoder(); 3627 CharsetDecoder decoder = cs.newDecoder(); 3628 3629 CharBuffer us = CharBuffer.allocate(0x20); 3630 ByteBuffer bs = ByteBuffer.allocate(0x20); 3631 3632 us.put((char)0x00A3); us.put((char)0x2020); us.put((char)0x41); 3633 3634 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x4B); bs.put((byte)0x4D); bs.put((byte)0x67); bs.put((byte)0x49); 3635 bs.put((byte)0x41); bs.put((byte)0x2D); bs.put((byte)0x41); 3636 3637 3638 bs.limit(bs.position()); 3639 bs.position(0); 3640 us.limit(us.position()); 3641 us.position(0); 3642 3643 smBufDecode(decoder, "IMAP", bs, us); 3644 smBufEncode(encoder, "IMAP", us, bs); 3645 3646 //the rest of the code in this method is for better code coverage 3647 us.clear(); 3648 bs.clear(); 3649 3650 //start of charset encoder code coverage 3651 //test buffer overflow 3652 us.put((char)0x0026); us.put((char)0x17A9); 3653 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3654 3655 bs.limit(bs.position()); 3656 bs.position(0); 3657 us.limit(us.position()); 3658 us.position(0); 3659 3660 try { 3661 smBufEncode(encoder, "IMAP-EN-1", us, bs, true, false); 3662 errln("Exception while encoding IMAP (1) should have been thrown."); 3663 } catch(Exception ex) { 3664 } 3665 3666 us.clear(); 3667 bs.clear(); 3668 3669 //test buffer overflow 3670 us.put((char)0x17A9); us.put((char)0x0941); 3671 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3672 3673 bs.limit(bs.position()); 3674 bs.position(0); 3675 us.limit(us.position()); 3676 us.position(0); 3677 3678 try { 3679 smBufEncode(encoder, "IMAP-EN-2", us, bs, true, false); 3680 errln("Exception while encoding IMAP (2) should have been thrown."); 3681 } catch(Exception ex) { 3682 } 3683 3684 us.clear(); 3685 bs.clear(); 3686 3687 //test buffer overflow 3688 us.put((char)0x17A9); us.put((char)0x0941); 3689 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3690 3691 bs.limit(bs.position()); 3692 bs.position(0); 3693 us.limit(us.position()); 3694 us.position(0); 3695 3696 try { 3697 smBufEncode(encoder, "IMAP-EN-3", us, bs, true, false); 3698 errln("Exception while encoding IMAP (3) should have been thrown."); 3699 } catch(Exception ex) { 3700 } 3701 3702 us.clear(); 3703 bs.clear(); 3704 3705 //test buffer overflow 3706 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3707 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3708 bs.put((byte)0x00); 3709 3710 bs.limit(bs.position()); 3711 bs.position(0); 3712 us.limit(us.position()); 3713 us.position(0); 3714 3715 try { 3716 smBufEncode(encoder, "IMAP-EN-4", us, bs, true, false); 3717 errln("Exception while encoding IMAP (4) should have been thrown."); 3718 } catch(Exception ex) { 3719 } 3720 3721 us.clear(); 3722 bs.clear(); 3723 3724 //test buffer overflow 3725 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3726 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3727 bs.put((byte)0x00); bs.put((byte)0x00); 3728 3729 bs.limit(bs.position()); 3730 bs.position(0); 3731 us.limit(us.position()); 3732 us.position(0); 3733 3734 try { 3735 smBufEncode(encoder, "IMAP-EN-5", us, bs, true, false); 3736 errln("Exception while encoding IMAP (5) should have been thrown."); 3737 } catch(Exception ex) { 3738 } 3739 3740 us.clear(); 3741 bs.clear(); 3742 3743 //test buffer overflow 3744 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); us.put((char)0x0970); 3745 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3746 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3747 3748 bs.limit(bs.position()); 3749 bs.position(0); 3750 us.limit(us.position()); 3751 us.position(0); 3752 3753 try { 3754 smBufEncode(encoder, "IMAP-EN-6", us, bs, true, false); 3755 errln("Exception while encoding IMAP (6) should have been thrown."); 3756 } catch(Exception ex) { 3757 } 3758 3759 us.clear(); 3760 bs.clear(); 3761 3762 //test buffer overflow 3763 us.put((char)0x17A9); us.put((char)0x0941); 3764 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3765 bs.put((byte)0x00); 3766 3767 bs.limit(bs.position()); 3768 bs.position(0); 3769 us.limit(us.position()); 3770 us.position(0); 3771 3772 try { 3773 smBufEncode(encoder, "IMAP-EN-7", us, bs, true, true); 3774 errln("Exception while encoding IMAP (7) should have been thrown."); 3775 } catch(Exception ex) { 3776 } 3777 3778 us.clear(); 3779 bs.clear(); 3780 3781 //test flushing 3782 us.put((char)0x17A9); us.put((char)0x0941); 3783 bs.put((byte)0x26); bs.put((byte)0x46); bs.put((byte)0x36); bs.put((byte)0x6b); bs.put((byte)0x4a); bs.put((byte)0x51); 3784 bs.put((byte)0x51); bs.put((byte)0x2d); 3785 3786 bs.limit(bs.position()); 3787 bs.position(0); 3788 us.limit(us.position()); 3789 us.position(0); 3790 3791 try { 3792 smBufEncode(encoder, "IMAP-EN-8", us, bs, true, true); 3793 } catch(Exception ex) { 3794 errln("Exception while encoding IMAP (8) should not have been thrown."); 3795 } 3796 3797 us = CharBuffer.allocate(0x08); 3798 bs = ByteBuffer.allocate(0x08); 3799 3800 //test flushing buffer overflow 3801 us.put((char)0x0061); 3802 bs.put((byte)0x61); bs.put((byte)0x00); 3803 3804 bs.limit(bs.position()); 3805 bs.position(0); 3806 us.limit(us.position()); 3807 us.position(0); 3808 3809 try { 3810 smBufEncode(encoder, "IMAP-EN-9", us, bs, true, true); 3811 } catch(Exception ex) { 3812 errln("Exception while encoding IMAP (9) should not have been thrown."); 3813 } 3814 //end of charset encoder code coverage 3815 3816 us = CharBuffer.allocate(0x10); 3817 bs = ByteBuffer.allocate(0x10); 3818 3819 //start of charset decoder code coverage 3820 //test malform case 2 3821 us.put((char)0x0000); us.put((char)0x0000); 3822 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x43); bs.put((byte)0x41); 3823 3824 bs.limit(bs.position()); 3825 bs.position(0); 3826 us.limit(us.position()); 3827 us.position(0); 3828 3829 try { 3830 smBufDecode(decoder, "IMAP-DE-1", bs, us, true, false); 3831 errln("Exception while decoding IMAP (1) should have been thrown."); 3832 } catch(Exception ex) { 3833 } 3834 3835 us.clear(); 3836 bs.clear(); 3837 3838 //test malform case 5 3839 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3840 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3841 bs.put((byte)0x41); bs.put((byte)0x49); bs.put((byte)0x41); 3842 3843 bs.limit(bs.position()); 3844 bs.position(0); 3845 us.limit(us.position()); 3846 us.position(0); 3847 3848 try { 3849 smBufDecode(decoder, "IMAP-DE-2", bs, us, true, false); 3850 errln("Exception while decoding IMAP (2) should have been thrown."); 3851 } catch(Exception ex) { 3852 } 3853 3854 us.clear(); 3855 bs.clear(); 3856 3857 //test malform case 7 3858 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3859 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3860 bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x42); 3861 bs.put((byte)0x41); 3862 3863 bs.limit(bs.position()); 3864 bs.position(0); 3865 us.limit(us.position()); 3866 us.position(0); 3867 3868 try { 3869 smBufDecode(decoder, "IMAP-DE-3", bs, us, true, false); 3870 errln("Exception while decoding IMAP (3) should have been thrown."); 3871 } catch(Exception ex) { 3872 } 3873 //end of charset decoder coder coverage 3874 } 3875 3876 //Test for charset UTF32LE to provide better code coverage 3877 @Test TestCharsetUTF32LE()3878 public void TestCharsetUTF32LE() { 3879 CoderResult result = CoderResult.UNDERFLOW; 3880 CharsetProvider provider = new CharsetProviderICU(); 3881 Charset cs = provider.charsetForName("UTF-32LE"); 3882 CharsetEncoder encoder = cs.newEncoder(); 3883 //CharsetDecoder decoder = cs.newDecoder(); 3884 3885 CharBuffer us = CharBuffer.allocate(0x10); 3886 ByteBuffer bs = ByteBuffer.allocate(0x10); 3887 3888 3889 //test malform surrogate 3890 us.put((char)0xD901); 3891 bs.put((byte)0x00); 3892 3893 bs.limit(bs.position()); 3894 bs.position(0); 3895 us.limit(us.position()); 3896 us.position(0); 3897 3898 try { 3899 smBufEncode(encoder, "UTF32LE-EN-1", us, bs, true, false); 3900 errln("Exception while encoding UTF32LE (1) should have been thrown."); 3901 } catch (Exception ex) { 3902 } 3903 3904 bs.clear(); 3905 us.clear(); 3906 3907 //test malform surrogate 3908 us.put((char)0xD901); us.put((char)0xD902); 3909 bs.put((byte)0x00); 3910 3911 bs.limit(bs.position()); 3912 bs.position(0); 3913 us.limit(us.position()); 3914 us.position(0); 3915 3916 result = encoder.encode(us, bs, true); 3917 3918 if (!result.isError() && !result.isOverflow()) { 3919 errln("Error while encoding UTF32LE (2) should have occurred."); 3920 } 3921 3922 bs.clear(); 3923 us.clear(); 3924 3925 //test overflow trail surrogate 3926 us.put((char)0xDD01); us.put((char)0xDD0E); us.put((char)0xDD0E); 3927 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3928 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3929 3930 bs.limit(bs.position()); 3931 bs.position(0); 3932 us.limit(us.position()); 3933 us.position(0); 3934 3935 result = encoder.encode(us, bs, true); 3936 3937 if (!result.isError() && !result.isOverflow()) { 3938 errln("Error while encoding UTF32LE (3) should have occurred."); 3939 } 3940 3941 bs.clear(); 3942 us.clear(); 3943 3944 //test malform lead surrogate 3945 us.put((char)0xD90D); us.put((char)0xD90E); 3946 bs.put((byte)0x00); 3947 3948 bs.limit(bs.position()); 3949 bs.position(0); 3950 us.limit(us.position()); 3951 us.position(0); 3952 3953 try { 3954 smBufEncode(encoder, "UTF32LE-EN-4", us, bs, true, false); 3955 errln("Exception while encoding UTF32LE (4) should have been thrown."); 3956 } catch (Exception ex) { 3957 } 3958 3959 bs.clear(); 3960 us.clear(); 3961 3962 //test overflow buffer 3963 us.put((char)0x0061); 3964 bs.put((byte)0x00); 3965 3966 bs.limit(bs.position()); 3967 bs.position(0); 3968 us.limit(us.position()); 3969 us.position(0); 3970 3971 try { 3972 smBufEncode(encoder, "UTF32LE-EN-5", us, bs, true, false); 3973 errln("Exception while encoding UTF32LE (5) should have been thrown."); 3974 } catch (Exception ex) { 3975 } 3976 3977 bs.clear(); 3978 us.clear(); 3979 3980 //test malform trail surrogate 3981 us.put((char)0xDD01); 3982 bs.put((byte)0x00); 3983 3984 bs.limit(bs.position()); 3985 bs.position(0); 3986 us.limit(us.position()); 3987 us.position(0); 3988 3989 try { 3990 smBufEncode(encoder, "UTF32LE-EN-6", us, bs, true, false); 3991 errln("Exception while encoding UTF32LE (6) should have been thrown."); 3992 } catch (Exception ex) { 3993 } 3994 } 3995 3996 //Test for charset UTF16LE to provide better code coverage 3997 @Test TestCharsetUTF16LE()3998 public void TestCharsetUTF16LE() { 3999 CoderResult result = CoderResult.UNDERFLOW; 4000 CharsetProvider provider = new CharsetProviderICU(); 4001 Charset cs = provider.charsetForName("UTF-16LE"); 4002 CharsetEncoder encoder = cs.newEncoder(); 4003 //CharsetDecoder decoder = cs.newDecoder(); 4004 4005 // Test for malform and change fromUChar32 for next call 4006 char u_pts1[] = { 4007 (char)0xD805, 4008 (char)0xDC01, (char)0xDC02, (char)0xDC03, 4009 (char)0xD901, (char)0xD902 4010 }; 4011 byte b_pts1[] = { 4012 (byte)0x00, 4013 (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 4014 }; 4015 4016 CharBuffer us = CharBuffer.allocate(u_pts1.length); 4017 ByteBuffer bs = ByteBuffer.allocate(b_pts1.length); 4018 4019 us.put(u_pts1); 4020 bs.put(b_pts1); 4021 4022 us.limit(1); 4023 us.position(0); 4024 bs.limit(1); 4025 bs.position(0); 4026 4027 result = encoder.encode(us, bs, true); 4028 4029 if (!result.isMalformed()) { 4030 // LE should not output BOM, so this should be malformed 4031 errln("Malformed while encoding UTF-16LE (1) should have occured."); 4032 } 4033 4034 // Test for malform surrogate from previous buffer 4035 us.limit(4); 4036 us.position(1); 4037 bs.limit(7); 4038 bs.position(1); 4039 4040 result = encoder.encode(us, bs, true); 4041 4042 if (!result.isMalformed()) { 4043 errln("Error while encoding UTF-16LE (2) should have occured."); 4044 } 4045 4046 // Test for malform trail surrogate 4047 encoder.reset(); 4048 4049 us.limit(1); 4050 us.position(0); 4051 bs.limit(1); 4052 bs.position(0); 4053 4054 result = encoder.encode(us, bs, true); 4055 4056 us.limit(6); 4057 us.position(4); 4058 bs.limit(4); 4059 bs.position(1); 4060 4061 result = encoder.encode(us, bs, true); 4062 4063 if (!result.isMalformed()) { 4064 errln("Error while encoding UTF-16LE (3) should have occured."); 4065 } 4066 } 4067 4068 //provide better code coverage for the generic charset UTF32 4069 @Test TestCharsetUTF32()4070 public void TestCharsetUTF32() { 4071 CoderResult result = CoderResult.UNDERFLOW; 4072 CharsetProvider provider = new CharsetProviderICU(); 4073 Charset cs = provider.charsetForName("UTF-32"); 4074 CharsetDecoder decoder = cs.newDecoder(); 4075 CharsetEncoder encoder = cs.newEncoder(); 4076 4077 //start of decoding code coverage 4078 char us_array[] = { 4079 0x0000, 0x0000, 0x0000, 0x0000, 4080 }; 4081 4082 byte bs_array1[] = { 4083 (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF, 4084 (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43, 4085 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4086 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4087 }; 4088 4089 byte bs_array2[] = { 4090 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4091 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4092 }; 4093 4094 CharBuffer us = CharBuffer.allocate(us_array.length); 4095 ByteBuffer bs = ByteBuffer.allocate(bs_array1.length); 4096 4097 us.put(us_array); 4098 bs.put(bs_array1); 4099 4100 us.limit(us.position()); 4101 us.position(0); 4102 bs.limit(bs.position()); 4103 bs.position(0); 4104 4105 try { 4106 smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false); 4107 errln("Malform exception while decoding UTF32 charset (1) should have been thrown."); 4108 } catch (Exception ex) { 4109 } 4110 4111 decoder = cs.newDecoder(); 4112 4113 bs = ByteBuffer.allocate(bs_array2.length); 4114 bs.put(bs_array2); 4115 4116 us.limit(4); 4117 us.position(0); 4118 bs.limit(bs.position()); 4119 bs.position(0); 4120 4121 try { 4122 smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false); 4123 } catch (Exception ex) { 4124 // should recognize little endian BOM 4125 errln("Exception while decoding UTF32 charset (2) should not have been thrown."); 4126 } 4127 4128 //Test malform exception 4129 bs.clear(); 4130 us.clear(); 4131 4132 bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00); 4133 us.put((char)0x0000); 4134 4135 us.limit(us.position()); 4136 us.position(0); 4137 bs.limit(bs.position()); 4138 bs.position(0); 4139 4140 try { 4141 smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false); 4142 errln("Malform exception while decoding UTF32 charset (3) should have been thrown."); 4143 } catch (Exception ex) { 4144 } 4145 4146 //Test BOM testing 4147 bs.clear(); 4148 us.clear(); 4149 4150 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE); 4151 us.put((char)0x0000); 4152 4153 us.limit(us.position()); 4154 us.position(0); 4155 bs.limit(bs.position()); 4156 bs.position(0); 4157 4158 try { 4159 smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false); 4160 } catch (Exception ex) { 4161 // should recognize big endian BOM 4162 errln("Exception while decoding UTF32 charset (4) should not have been thrown."); 4163 } 4164 //end of decoding code coverage 4165 4166 //start of encoding code coverage 4167 us = CharBuffer.allocate(0x10); 4168 bs = ByteBuffer.allocate(0x10); 4169 4170 //test wite BOM overflow error 4171 us.put((char)0xDC01); 4172 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4173 4174 us.limit(us.position()); 4175 us.position(0); 4176 bs.limit(bs.position()); 4177 bs.position(0); 4178 4179 result = encoder.encode(us, bs, true); 4180 // must try to output BOM first for UTF-32 (not UTF-32BE or UTF-32LE) 4181 if (!result.isOverflow()) { 4182 errln("Buffer overflow error while encoding UTF32 charset (1) should have occurred."); 4183 } 4184 4185 us.clear(); 4186 bs.clear(); 4187 4188 //test malform surrogate and store value in fromChar32 4189 us.put((char)0xD801); us.put((char)0xD802); 4190 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4191 4192 us.limit(us.position()); 4193 us.position(0); 4194 bs.limit(bs.position()); 4195 bs.position(0); 4196 4197 result = encoder.encode(us, bs, true); 4198 if (!result.isMalformed()) { 4199 errln("Malformed error while encoding UTF32 charset (2) should have occurred."); 4200 } 4201 4202 us.clear(); 4203 bs.clear(); 4204 4205 //test malform surrogate 4206 us.put((char)0x0000); us.put((char)0xD902); 4207 4208 us.limit(us.position()); 4209 us.position(0); 4210 bs.limit(bs.position()); 4211 bs.position(0); 4212 4213 result = encoder.encode(us, bs, true); 4214 if (!result.isOverflow()) { 4215 errln("Overflow error while encoding UTF32 charset (3) should have occurred."); 4216 } 4217 4218 us.clear(); 4219 bs.clear(); 4220 4221 //test malform surrogate 4222 encoder.reset(); 4223 us.put((char)0xD801); 4224 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4225 4226 us.limit(us.position()); 4227 us.position(0); 4228 bs.limit(bs.position()); 4229 bs.position(0); 4230 4231 result = encoder.encode(us, bs, true); 4232 if (!result.isMalformed()) { 4233 errln("Malform error while encoding UTF32 charset (4) should have occurred."); 4234 } 4235 4236 us.clear(); 4237 bs.clear(); 4238 4239 //test overflow surrogate 4240 us.put((char)0x0000); us.put((char)0xDDE1); us.put((char)0xD915); us.put((char)0xDDF2); 4241 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4242 4243 us.limit(us.position()); 4244 us.position(0); 4245 bs.limit(bs.position()); 4246 bs.position(0); 4247 4248 result = encoder.encode(us, bs, true); 4249 if (!result.isOverflow()) { 4250 errln("Overflow error while encoding UTF32 charset (5) should have occurred."); 4251 } 4252 4253 us.clear(); 4254 bs.clear(); 4255 4256 //test malform surrogate 4257 encoder.reset(); 4258 us.put((char)0xDDE1); 4259 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4260 4261 us.limit(us.position()); 4262 us.position(0); 4263 bs.limit(bs.position()); 4264 bs.position(0); 4265 4266 result = encoder.encode(us, bs, true); 4267 if (!result.isMalformed()) { 4268 errln("Malform error while encoding UTF32 charset (6) should have occurred."); 4269 } 4270 //end of encoding code coverage 4271 } 4272 4273 //this method provides better code coverage decoding UTF32 LE/BE 4274 @Test TestDecodeUTF32LEBE()4275 public void TestDecodeUTF32LEBE() { 4276 CoderResult result = CoderResult.UNDERFLOW; 4277 CharsetProvider provider = new CharsetProviderICU(); 4278 CharsetDecoder decoder; 4279 CharBuffer us = CharBuffer.allocate(0x10); 4280 ByteBuffer bs = ByteBuffer.allocate(0x10); 4281 4282 //decode UTF32LE 4283 decoder = provider.charsetForName("UTF-32LE").newDecoder(); 4284 //test overflow buffer 4285 bs.put((byte)0x41); bs.put((byte)0xFF); bs.put((byte)0x01); bs.put((byte)0x00); 4286 us.put((char)0x0000); 4287 4288 us.limit(us.position()); 4289 us.position(0); 4290 bs.limit(bs.position()); 4291 bs.position(0); 4292 4293 try { 4294 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4295 errln("Overflow exception while decoding UTF32LE (1) should have been thrown."); 4296 } catch (Exception ex) { 4297 } 4298 // test overflow buffer handling in CharsetDecoderICU 4299 bs.position(0); 4300 us.position(0); 4301 decoder.reset(); 4302 result = decoder.decode(bs, us, true); 4303 if (result.isOverflow()) { 4304 result = decoder.decode(bs, us, true); 4305 if (!result.isOverflow()) { 4306 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4307 } 4308 } else { 4309 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4310 } 4311 4312 us.clear(); 4313 bs.clear(); 4314 //test malform buffer 4315 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4316 us.put((char)0x0000); 4317 4318 us.limit(us.position()); 4319 us.position(0); 4320 bs.limit(bs.position()); 4321 bs.position(0); 4322 4323 try { 4324 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4325 errln("Malform exception while decoding UTF32LE (2) should have been thrown."); 4326 } catch (Exception ex) { 4327 } 4328 4329 us.clear(); 4330 bs.clear(); 4331 //test malform buffer 4332 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4333 bs.put((byte)0xFF); bs.put((byte)0xDF); bs.put((byte)0x10); 4334 us.put((char)0x0000); 4335 4336 us.limit(us.position()); 4337 us.position(0); 4338 bs.limit(bs.position()); 4339 bs.position(0); 4340 4341 try { 4342 // must flush in order to exhibit malformed behavior 4343 smBufDecode(decoder, "UTF-32LE", bs, us, true, true); 4344 errln("Malform exception while decoding UTF32LE (3) should have been thrown."); 4345 } catch (Exception ex) { 4346 } 4347 4348 us.clear(); 4349 bs.clear(); 4350 //test malform buffer 4351 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4352 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4353 us.put((char)0x0000); 4354 4355 us.limit(us.position()); 4356 us.position(0); 4357 bs.limit(bs.position()); 4358 bs.position(0); 4359 4360 try { 4361 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4362 errln("Malform exception while decoding UTF32LE (4) should have been thrown."); 4363 } catch (Exception ex) { 4364 } 4365 4366 us.clear(); 4367 bs.clear(); 4368 //test overflow buffer 4369 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4370 bs.put((byte)0xDD); bs.put((byte)0xFF); bs.put((byte)0x10); bs.put((byte)0x00); 4371 us.put((char)0x0000); 4372 4373 us.limit(us.position()); 4374 us.position(0); 4375 bs.limit(bs.position()); 4376 bs.position(0); 4377 4378 try { 4379 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4380 errln("Overflow exception while decoding UTF32LE (5) should have been thrown."); 4381 } catch (Exception ex) { 4382 } 4383 //end of decode UTF32LE 4384 4385 bs.clear(); 4386 us.clear(); 4387 4388 //decode UTF32BE 4389 decoder = provider.charsetForName("UTF-32BE").newDecoder(); 4390 //test overflow buffer 4391 bs.put((byte)0x00); bs.put((byte)0x01); bs.put((byte)0xFF); bs.put((byte)0x41); 4392 us.put((char)0x0000); 4393 4394 us.limit(us.position()); 4395 us.position(0); 4396 bs.limit(bs.position()); 4397 bs.position(0); 4398 4399 try { 4400 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4401 errln("Overflow exception while decoding UTF32BE (1) should have been thrown."); 4402 } catch (Exception ex) { 4403 } 4404 4405 bs.clear(); 4406 us.clear(); 4407 //test malform buffer 4408 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xD9); bs.put((byte)0x02); 4409 us.put((char)0x0000); 4410 4411 us.limit(us.position()); 4412 us.position(0); 4413 bs.limit(bs.position()); 4414 bs.position(0); 4415 4416 try { 4417 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4418 errln("Malform exception while decoding UTF32BE (2) should have been thrown."); 4419 } catch (Exception ex) { 4420 } 4421 4422 bs.clear(); 4423 us.clear(); 4424 //test malform buffer 4425 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4426 bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDF); 4427 us.put((char)0x0000); 4428 4429 us.limit(us.position()); 4430 us.position(0); 4431 bs.limit(bs.position()); 4432 bs.position(0); 4433 4434 try { 4435 // must flush to exhibit malformed behavior 4436 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4437 errln("Malform exception while decoding UTF32BE (3) should have been thrown."); 4438 } catch (Exception ex) { 4439 } 4440 4441 bs.clear(); 4442 us.clear(); 4443 //test overflow buffer 4444 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4445 bs.put((byte)0x00); bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDD); 4446 us.put((char)0x0000); 4447 4448 us.limit(us.position()); 4449 us.position(0); 4450 bs.limit(bs.position()); 4451 bs.position(0); 4452 4453 try { 4454 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4455 errln("Overflow exception while decoding UTF32BE (4) should have been thrown."); 4456 } catch (Exception ex) { 4457 } 4458 4459 bs.clear(); 4460 us.clear(); 4461 //test malform buffer 4462 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); 4463 us.put((char)0x0000); 4464 4465 us.limit(us.position()); 4466 us.position(0); 4467 bs.limit(bs.position()); 4468 bs.position(0); 4469 4470 try { 4471 // must flush to exhibit malformed behavior 4472 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4473 errln("Malform exception while decoding UTF32BE (5) should have been thrown."); 4474 } catch (Exception ex) { 4475 } 4476 //end of decode UTF32BE 4477 } 4478 4479 //provide better code coverage for UTF8 4480 @Test TestCharsetUTF8()4481 public void TestCharsetUTF8() { 4482 CoderResult result = CoderResult.UNDERFLOW; 4483 CharsetProvider provider = new CharsetProviderICU(); 4484 CharsetDecoder decoder = provider.charsetForName("UTF-8").newDecoder(); 4485 CharsetEncoder encoder = provider.charsetForName("UTF-8").newEncoder(); 4486 4487 CharBuffer us = CharBuffer.allocate(0x10); 4488 ByteBuffer bs = ByteBuffer.allocate(0x10); 4489 ByteBuffer bs2; 4490 CharBuffer us2; 4491 int limit_us; 4492 int limit_bs; 4493 4494 //encode and decode using read only buffer 4495 encoder.reset(); 4496 decoder.reset(); 4497 us.put((char)0x0041); us.put((char)0x0081); us.put((char)0xEF65); us.put((char)0xD902); 4498 bs.put((byte)0x41); bs.put((byte)0xc2); bs.put((byte)0x81); bs.put((byte)0xee); bs.put((byte)0xbd); bs.put((byte)0xa5); 4499 bs.put((byte)0x00); 4500 limit_us = us.position(); 4501 limit_bs = bs.position(); 4502 4503 us.limit(limit_us); 4504 us.position(0); 4505 bs.limit(limit_bs); 4506 bs.position(0); 4507 bs2 = bs.asReadOnlyBuffer(); 4508 us2 = us.asReadOnlyBuffer(); 4509 4510 result = decoder.decode(bs2, us, true); 4511 if (!result.isUnderflow() || !equals(us, us2)) { 4512 errln("Error while decoding UTF-8 (1) should not have occured."); 4513 } 4514 4515 us2.limit(limit_us); 4516 us2.position(0); 4517 bs.limit(limit_bs); 4518 bs.position(0); 4519 4520 result = encoder.encode(us2, bs, true); 4521 if (!result.isUnderflow() || !equals(bs, bs2)) { 4522 errln("Error while encoding UTF-8 (1) should not have occured."); 4523 } 4524 4525 us.clear(); 4526 bs.clear(); 4527 4528 //test overflow buffer while encoding 4529 //readonly buffer 4530 encoder.reset(); 4531 us.put((char)0x0081); us.put((char)0xEF65); 4532 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4533 limit_us = us.position(); 4534 us2 = us.asReadOnlyBuffer(); 4535 us2.limit(limit_us); 4536 us2.position(0); 4537 bs.limit(1); 4538 bs.position(0); 4539 result = encoder.encode(us2, bs, true); 4540 if (!result.isOverflow()) { 4541 errln("Overflow Error should have occured while encoding UTF-8 (2)."); 4542 } 4543 4544 encoder.reset(); 4545 4546 us2.limit(limit_us); 4547 us2.position(1); 4548 bs.limit(1); 4549 bs.position(0); 4550 result = encoder.encode(us2, bs, true); 4551 if (!result.isOverflow()) { 4552 errln("Overflow Error should have occured while encoding UTF-8 (3)."); 4553 } 4554 4555 encoder.reset(); 4556 4557 us2.limit(limit_us); 4558 us2.position(1); 4559 bs.limit(2); 4560 bs.position(0); 4561 result = encoder.encode(us2, bs, true); 4562 if (!result.isOverflow()) { 4563 errln("Overflow Error should have occured while encoding UTF-8 (4)."); 4564 } 4565 4566 encoder.reset(); 4567 4568 us2.limit(limit_us); 4569 us2.position(0); 4570 bs.limit(2); 4571 bs.position(0); 4572 result = encoder.encode(us2, bs, true); 4573 if (!result.isOverflow()) { 4574 errln("Overflow Error should have occured while encoding UTF-8 (5)."); 4575 } 4576 4577 //not readonly buffer 4578 encoder.reset(); 4579 4580 us.limit(limit_us); 4581 us.position(0); 4582 bs.limit(1); 4583 bs.position(0); 4584 result = encoder.encode(us, bs, true); 4585 if (!result.isOverflow()) { 4586 errln("Overflow Error should have occured while encoding UTF-8 (6)."); 4587 } 4588 4589 encoder.reset(); 4590 4591 us.limit(limit_us); 4592 us.position(0); 4593 bs.limit(3); 4594 bs.position(0); 4595 result = encoder.encode(us, bs, true); 4596 if (!result.isOverflow()) { 4597 errln("Overflow Error should have occured while encoding UTF-8 (7)."); 4598 } 4599 4600 encoder.reset(); 4601 4602 us.limit(limit_us); 4603 us.position(1); 4604 bs.limit(2); 4605 bs.position(0); 4606 result = encoder.encode(us, bs, true); 4607 if (!result.isOverflow()) { 4608 errln("Overflow Error should have occured while encoding UTF-8 (8)."); 4609 } 4610 4611 encoder.reset(); 4612 4613 us.limit(limit_us + 1); 4614 us.position(1); 4615 bs.limit(3); 4616 bs.position(0); 4617 result = encoder.encode(us, bs, true); 4618 if (!result.isOverflow()) { 4619 errln("Overflow Error should have occured while encoding UTF-8 (9)."); 4620 } 4621 4622 us.clear(); 4623 bs.clear(); 4624 4625 //test encoding 4 byte characters 4626 encoder.reset(); 4627 us.put((char)0xD902); us.put((char)0xDD02); us.put((char)0x0041); 4628 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4629 limit_us = us.position(); 4630 us2 = us.asReadOnlyBuffer(); 4631 us2.limit(limit_us); 4632 us2.position(0); 4633 bs.limit(1); 4634 bs.position(0); 4635 result = encoder.encode(us2, bs, true); 4636 if (!result.isOverflow()) { 4637 errln("Overflow Error should have occured while encoding UTF-8 (10)."); 4638 } 4639 4640 encoder.reset(); 4641 4642 us2.limit(limit_us); 4643 us2.position(0); 4644 bs.limit(2); 4645 bs.position(0); 4646 result = encoder.encode(us2, bs, true); 4647 if (!result.isOverflow()) { 4648 errln("Overflow Error should have occured while encoding UTF-8 (11)."); 4649 } 4650 4651 encoder.reset(); 4652 4653 us2.limit(limit_us); 4654 us2.position(0); 4655 bs.limit(3); 4656 bs.position(0); 4657 result = encoder.encode(us2, bs, true); 4658 if (!result.isOverflow()) { 4659 errln("Overflow Error should have occured while encoding UTF-8 (12)."); 4660 } 4661 4662 encoder.reset(); 4663 4664 us2.limit(limit_us); 4665 us2.position(0); 4666 bs.limit(4); 4667 bs.position(0); 4668 result = encoder.encode(us2, bs, true); 4669 if (!result.isOverflow()) { 4670 errln("Overflow Error should have occured while encoding UTF-8 (13)."); 4671 } 4672 4673 us.clear(); 4674 bs.clear(); 4675 4676 //decoding code coverage 4677 //test malform error 4678 decoder.reset(); 4679 bs.put((byte)0xC2); bs.put((byte)0xC2); 4680 us.put((char)0x0000); 4681 bs2 = bs.asReadOnlyBuffer(); 4682 4683 us.limit(1); 4684 us.position(0); 4685 bs2.limit(1); 4686 bs2.position(0); 4687 4688 result = decoder.decode(bs2, us, true); 4689 result = decoder.flush(us); 4690 if (!result.isMalformed()) { 4691 errln("Malform error should have occurred while decoding UTF-8 (1)."); 4692 } 4693 4694 us.limit(1); 4695 us.position(0); 4696 bs2.limit(1); 4697 bs2.position(0); 4698 4699 decoder.reset(); 4700 4701 result = decoder.decode(bs2, us, true); 4702 us.limit(1); 4703 us.position(0); 4704 bs2.limit(2); 4705 bs2.position(0); 4706 result = decoder.decode(bs2, us, true); 4707 if (!result.isMalformed()) { 4708 errln("Malform error should have occurred while decoding UTF-8 (2)."); 4709 } 4710 4711 us.clear(); 4712 bs.clear(); 4713 4714 //test overflow buffer 4715 bs.put((byte)0x01); bs.put((byte)0x41); 4716 us.put((char)0x0000); 4717 bs2 = bs.asReadOnlyBuffer(); 4718 us.limit(1); 4719 us.position(0); 4720 bs2.limit(2); 4721 bs2.position(0); 4722 4723 result = decoder.decode(bs2, us, true); 4724 if (!result.isOverflow()) { 4725 errln("Overflow error should have occurred while decoding UTF-8 (3)."); 4726 } 4727 4728 us.clear(); 4729 bs.clear(); 4730 4731 //test malform string 4732 decoder.reset(); 4733 bs.put((byte)0xF5); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4734 us.put((char)0x0000); 4735 bs2 = bs.asReadOnlyBuffer(); 4736 us.limit(1); 4737 us.position(0); 4738 bs2.limit(4); 4739 bs2.position(0); 4740 4741 result = decoder.decode(bs2, us, true); 4742 if (!result.isMalformed()) { 4743 errln("Malform error should have occurred while decoding UTF-8 (4)."); 4744 } 4745 4746 bs.clear(); 4747 4748 //test overflow 4749 decoder.reset(); 4750 bs.put((byte)0xF3); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4751 bs2 = bs.asReadOnlyBuffer(); 4752 us.limit(1); 4753 us.position(0); 4754 bs2.limit(4); 4755 bs2.position(0); 4756 4757 result = decoder.decode(bs2, us, true); 4758 if (!result.isOverflow()) { 4759 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4760 } 4761 4762 //test overflow 4763 decoder.reset(); 4764 us.limit(2); 4765 us.position(0); 4766 bs2.limit(5); 4767 bs2.position(0); 4768 4769 result = decoder.decode(bs2, us, true); 4770 if (!result.isOverflow()) { 4771 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4772 } 4773 4774 //test overflow 4775 decoder.reset(); 4776 us.limit(1); 4777 us.position(0); 4778 bs.limit(5); 4779 bs.position(0); 4780 4781 result = decoder.decode(bs, us, true); 4782 if (!result.isOverflow()) { 4783 errln("Overflow error should have occurred while decoding UTF-8 (6)."); 4784 } 4785 4786 bs.clear(); 4787 4788 //test overflow 4789 decoder.reset(); 4790 bs.put((byte)0x41); bs.put((byte)0x42); 4791 us.limit(1); 4792 us.position(0); 4793 bs.limit(2); 4794 bs.position(0); 4795 4796 result = decoder.decode(bs, us, true); 4797 if (!result.isOverflow()) { 4798 errln("Overflow error should have occurred while decoding UTF-8 (7)."); 4799 } 4800 4801 } 4802 4803 //provide better code coverage for Charset UTF16 4804 @Test TestCharsetUTF16()4805 public void TestCharsetUTF16() { 4806 CoderResult result = CoderResult.UNDERFLOW; 4807 CharsetProvider provider = new CharsetProviderICU(); 4808 CharsetDecoder decoder = provider.charsetForName("UTF-16").newDecoder(); 4809 CharsetEncoder encoder = provider.charsetForName("UTF-16").newEncoder(); 4810 4811 CharBuffer us = CharBuffer.allocate(0x10); 4812 ByteBuffer bs = ByteBuffer.allocate(0x10); 4813 4814 //test flush buffer and malform string 4815 bs.put((byte)0xFF); 4816 us.put((char)0x0000); 4817 4818 us.limit(us.position()); 4819 us.position(0); 4820 bs.limit(bs.position()); 4821 bs.position(0); 4822 4823 result = decoder.decode(bs, us, true); 4824 result = decoder.flush(us); 4825 if (!result.isMalformed()) { 4826 errln("Malform error while decoding UTF-16 should have occurred."); 4827 } 4828 4829 us.clear(); 4830 bs.clear(); 4831 4832 us.put((char)0xD902); us.put((char)0xDD01); us.put((char)0x0041); 4833 4834 us.limit(1); 4835 us.position(0); 4836 bs.limit(4); 4837 bs.position(0); 4838 4839 result = encoder.encode(us, bs, true); 4840 us.limit(3); 4841 us.position(0); 4842 bs.limit(3); 4843 bs.position(0); 4844 result = encoder.encode(us, bs, true); 4845 if (!result.isOverflow()) { 4846 errln("Overflow buffer while encoding UTF-16 should have occurred."); 4847 } 4848 4849 us.clear(); 4850 bs.clear(); 4851 4852 //test overflow buffer 4853 decoder.reset(); 4854 decoder = provider.charsetForName("UTF-16BE").newDecoder(); 4855 4856 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x41); 4857 4858 us.limit(0); 4859 us.position(0); 4860 bs.limit(3); 4861 bs.position(0); 4862 4863 result = decoder.decode(bs, us, true); 4864 if (!result.isOverflow()) { 4865 errln("Overflow buffer while decoding UTF-16 should have occurred."); 4866 } 4867 } 4868 4869 //provide better code coverage for Charset ISO-2022-KR 4870 @Test TestCharsetISO2022KR()4871 public void TestCharsetISO2022KR() { 4872 CoderResult result = CoderResult.UNDERFLOW; 4873 CharsetProvider provider = new CharsetProviderICU(); 4874 CharsetDecoder decoder = provider.charsetForName("ISO-2022-KR").newDecoder(); 4875 4876 byte bytearray[] = { 4877 (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x43, (byte)0x41, (byte)0x42, 4878 }; 4879 char chararray[] = { 4880 (char)0x0041 4881 }; 4882 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4883 CharBuffer cb = CharBuffer.wrap(chararray); 4884 4885 result = decoder.decode(bb, cb, true); 4886 4887 if (!result.isOverflow()) { 4888 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4889 } 4890 } 4891 4892 //provide better code coverage for Charset ISO-2022-JP 4893 @Test TestCharsetISO2022JP()4894 public void TestCharsetISO2022JP() { 4895 CoderResult result = CoderResult.UNDERFLOW; 4896 CharsetProvider provider = new CharsetProviderICU(); 4897 CharsetDecoder decoder = provider.charsetForName("ISO-2022-JP-2").newDecoder(); 4898 4899 byte bytearray[] = { 4900 (byte)0x1b, (byte)0x24, (byte)0x28, (byte)0x44, (byte)0x0A, (byte)0x41, 4901 }; 4902 char chararray[] = { 4903 (char)0x000A 4904 }; 4905 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4906 CharBuffer cb = CharBuffer.wrap(chararray); 4907 4908 result = decoder.decode(bb, cb, true); 4909 4910 if (!result.isOverflow()) { 4911 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4912 } 4913 } 4914 4915 //provide better code coverage for Charset ASCII 4916 @Test TestCharsetASCII()4917 public void TestCharsetASCII() { 4918 CoderResult result = CoderResult.UNDERFLOW; 4919 CharsetProvider provider = new CharsetProviderICU(); 4920 CharsetDecoder decoder = provider.charsetForName("US-ASCII").newDecoder(); 4921 4922 byte bytearray[] = { 4923 (byte)0x41 4924 }; 4925 char chararray[] = { 4926 (char)0x0041 4927 }; 4928 4929 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4930 CharBuffer cb = CharBuffer.wrap(chararray); 4931 4932 result = decoder.decode(bb, cb, true); 4933 result = decoder.flush(cb); 4934 4935 if (result.isError()) { 4936 errln("Error occurred while decoding US-ASCII."); 4937 } 4938 } 4939 4940 // provide better code coverage for Charset Callbacks 4941 /* Different aspects of callbacks are being tested including using different context available */ 4942 @Test TestCharsetCallbacks()4943 public void TestCharsetCallbacks() { 4944 CoderResult result = CoderResult.UNDERFLOW; 4945 CharsetProvider provider = new CharsetProviderICU(); 4946 CharsetEncoder encoder = provider.charsetForName("iso-2022-jp").newEncoder(); 4947 CharsetDecoder decoder = provider.charsetForName("iso-2022-jp").newDecoder(); 4948 4949 String context3[] = { 4950 "i", 4951 "J" 4952 }; 4953 4954 // Testing encoder escape callback 4955 String context1[] = { 4956 "J", 4957 "C", 4958 "D", 4959 null 4960 }; 4961 char chararray[] = { 4962 (char)0xd122 4963 }; 4964 ByteBuffer bb = ByteBuffer.allocate(20); 4965 CharBuffer cb = CharBuffer.wrap(chararray); 4966 4967 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.OVERFLOW, CharsetCallback.FROM_U_CALLBACK_ESCAPE, null); // This callback is not valid. 4968 for (int i = 0; i < context1.length; i++) { 4969 encoder.reset(); 4970 cb.position(0); 4971 bb.position(0); 4972 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_ESCAPE, context1[i]); // This callback is valid. 4973 4974 result = encoder.encode(cb, bb, true); 4975 if (result.isError()) { 4976 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4977 } 4978 } 4979 4980 // Testing encoder skip callback 4981 for (int i = 0; i < context3.length; i++) { 4982 encoder.reset(); 4983 cb.position(0); 4984 bb.position(0); 4985 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SKIP, context3[i]); 4986 4987 result = encoder.encode(cb, bb, true); 4988 if (result.isError() && i == 0) { 4989 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4990 } 4991 } 4992 4993 // Testing encoder sub callback 4994 for (int i = 0; i < context3.length; i++) { 4995 encoder.reset(); 4996 cb.position(0); 4997 bb.position(0); 4998 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE, context3[i]); 4999 5000 result = encoder.encode(cb, bb, true); 5001 if (result.isError() && i == 0) { 5002 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5003 } 5004 } 5005 5006 // Testing decoder escape callback 5007 String context2[] = { 5008 "X", 5009 "C", 5010 "D", 5011 null 5012 }; 5013 byte bytearray[] = { 5014 (byte)0x1b, (byte)0x2e, (byte)0x43 5015 }; 5016 bb = ByteBuffer.wrap(bytearray); 5017 cb = CharBuffer.allocate(20); 5018 5019 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_ESCAPE, null); // This callback is not valid. 5020 for (int i = 0; i < context2.length; i++) { 5021 decoder.reset(); 5022 cb.position(0); 5023 bb.position(0); 5024 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_ESCAPE, context2[i]); // This callback is valid. 5025 5026 result = decoder.decode(bb, cb, true); 5027 if (result.isError()) { 5028 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder."); 5029 } 5030 } 5031 5032 // Testing decoder skip callback 5033 for (int i = 0; i < context3.length; i++) { 5034 decoder.reset(); 5035 cb.position(0); 5036 bb.position(0); 5037 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_SKIP, context3[i]); 5038 result = decoder.decode(bb, cb, true); 5039 if (!result.isError()) { 5040 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder should have occurred."); 5041 } 5042 } 5043 } 5044 5045 // Testing invalid input exceptions 5046 @Test TestInvalidInput()5047 public void TestInvalidInput() { 5048 CharsetProvider provider = new CharsetProviderICU(); 5049 Charset charset = provider.charsetForName("iso-2022-jp"); 5050 CharsetEncoder encoder = charset.newEncoder(); 5051 CharsetDecoder decoder = charset.newDecoder(); 5052 5053 try { 5054 encoder.encode(CharBuffer.allocate(10), null, true); 5055 errln("Illegal argument exception should have been thrown due to null target."); 5056 } catch (Exception ex) { 5057 } 5058 5059 try { 5060 decoder.decode(ByteBuffer.allocate(10), null, true); 5061 errln("Illegal argument exception should have been thrown due to null target."); 5062 } catch (Exception ex) { 5063 } 5064 } 5065 5066 // Test java canonical names 5067 @Test TestGetICUJavaCanonicalNames()5068 public void TestGetICUJavaCanonicalNames() { 5069 // Ambiguous charset name. 5070 String javaCName = CharsetProviderICU.getJavaCanonicalName("windows-1250"); 5071 String icuCName = CharsetProviderICU.getICUCanonicalName("Windows-1250"); 5072 if (javaCName == null || icuCName == null) { 5073 errln("Unable to get Java or ICU canonical name from ambiguous alias"); 5074 } 5075 5076 } 5077 5078 // Port over from ICU4C for test conversion tables (mbcs version 5.x) 5079 // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU. 5080 @Test TestCharsetTestData()5081 public void TestCharsetTestData() { 5082 CoderResult result = CoderResult.UNDERFLOW; 5083 String charsetName = "test4"; 5084 CharsetProvider provider = new CharsetProviderICU(); 5085 Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "com/ibm/icu/dev/data/testdata", 5086 this.getClass().getClassLoader()); 5087 CharsetEncoder encoder = charset.newEncoder(); 5088 CharsetDecoder decoder = charset.newDecoder(); 5089 5090 byte bytearray[] = { 5091 0x01, 0x02, 0x03, 0x0a, 5092 0x01, 0x02, 0x03, 0x0b, 5093 0x01, 0x02, 0x03, 0x0d, 5094 }; 5095 5096 // set the callback for overflow errors 5097 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null); 5098 5099 ByteBuffer bb = ByteBuffer.wrap(bytearray); 5100 CharBuffer cb = CharBuffer.allocate(10); 5101 5102 bb.limit(4); 5103 cb.limit(1); // Overflow should occur and is expected 5104 result = decoder.decode(bb, cb, false); 5105 if (result.isError()) { 5106 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5107 } 5108 5109 bb.limit(8); 5110 result = decoder.decode(bb, cb, false); 5111 if (result.isError()) { 5112 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5113 } 5114 5115 bb.limit(12); 5116 result = decoder.decode(bb, cb, true); 5117 if (result.isError()) { 5118 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5119 } 5120 5121 char chararray[] = { 5122 0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */ 5123 0xD940, /* first half of \U00060006 or \U00060007 */ 5124 0xDC07/* second half of \U00060007 */ 5125 }; 5126 5127 cb = CharBuffer.wrap(chararray); 5128 bb = ByteBuffer.allocate(10); 5129 5130 bb.limit(2); 5131 cb.limit(4); 5132 result = encoder.encode(cb, bb, false); 5133 if (result.isError()) { 5134 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5135 } 5136 cb.limit(5); 5137 result = encoder.encode(cb, bb, false); 5138 if (result.isError()) { 5139 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5140 } 5141 cb.limit(6); 5142 result = encoder.encode(cb, bb, true); 5143 if (!result.isError()) { 5144 errln("Error should have occurred while encoding: " + charsetName); 5145 } 5146 } 5147 5148 /* Round trip test of SCSU converter*/ 5149 @Test TestSCSUConverter()5150 public void TestSCSUConverter(){ 5151 byte allFeaturesSCSU[]={ 5152 0x41,(byte) 0xdf, 0x12,(byte) 0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x1b, 0x03, 5153 (byte)0xdf, 0x1c,(byte) 0x88,(byte) 0x80, 0x0b, (byte)0xbf,(byte) 0xff,(byte) 0xff, 0x0d, 0x0a, 5154 0x41, 0x10, (byte)0xdf, 0x12, (byte)0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x13, 5155 (byte)0xdf, 0x14,(byte) 0x80, 0x15, (byte)0xff 5156 }; 5157 5158 char allFeaturesUTF16[]={ 5159 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 5160 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 5161 0x01df, 0xf000, 0xdbff, 0xdfff 5162 }; 5163 5164 5165 char germanUTF16[]={ 5166 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 5167 }; 5168 5169 byte germanSCSU[]={ 5170 (byte)0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65,(byte) 0xdf, 0x74 5171 }; 5172 5173 char russianUTF16[]={ 5174 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 5175 }; 5176 5177 byte russianSCSU[]={ 5178 0x12, (byte)0x9c,(byte)0xbe,(byte) 0xc1, (byte)0xba, (byte)0xb2, (byte)0xb0 5179 }; 5180 5181 char japaneseUTF16[]={ 5182 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 5183 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 5184 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 5185 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 5186 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 5187 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 5188 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 5189 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 5190 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 5191 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 5192 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 5193 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 5194 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 5195 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 5196 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 5197 }; 5198 5199 // SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 5200 //it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient 5201 byte japaneseSCSU[]={ 5202 0x08, 0x00, 0x1b, 0x4c,(byte) 0xea, 0x16, (byte)0xca, (byte)0xd3,(byte) 0x94, 0x0f, 0x53, (byte)0xef, 0x61, 0x1b, (byte)0xe5,(byte) 0x84, 5203 (byte)0xc4, 0x0f, (byte)0x53,(byte) 0xef, 0x61, 0x1b, (byte)0xe5, (byte)0x84, (byte)0xc4, 0x16, (byte)0xca, (byte)0xd3, (byte)0x94, 0x08, 0x02, 0x0f, 5204 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, (byte)0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41,(byte) 0x88, 0x4c, 5205 (byte) 0xe5,(byte) 0x97, (byte)0x9f, 0x08, 0x0c, 0x16,(byte) 0xca,(byte) 0xd3, (byte)0x94, 0x15, (byte)0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 5206 (byte) 0x8c, (byte)0xb4, (byte)0xa3,(byte) 0x9f,(byte) 0xca, (byte)0x99, (byte)0xcb,(byte) 0x8b, (byte)0xc2,(byte) 0x97,(byte) 0xcc,(byte) 0xaa,(byte) 0x84, 0x08, 0x02, 0x0e, 5207 0x7c, 0x73, (byte)0xe2, 0x16, (byte)0xa3,(byte) 0xb7, (byte)0xcb, (byte)0x93, (byte)0xd3,(byte) 0xb4,(byte) 0xc5, (byte)0xdc, (byte)0x9f, 0x0e, 0x79, 0x3e, 5208 0x06, (byte)0xae, (byte)0xb1, (byte)0x9d,(byte) 0x93, (byte)0xd3, 0x08, 0x0c, (byte)0xbe,(byte) 0xa3, (byte)0x8f, 0x08,(byte) 0x88,(byte) 0xbe,(byte) 0xa3,(byte) 0x8d, 5209 (byte)0xd3,(byte) 0xa8, (byte)0xa3, (byte)0x97,(byte) 0xc5, 0x17,(byte) 0x89, 0x08, 0x0d, 0x15,(byte) 0xd2, 0x08, 0x01, (byte)0x93, (byte)0xc8,(byte) 0xaa, 5210 (byte)0x8f, 0x0e, 0x61, 0x1b, (byte)0x99,(byte) 0xcb, 0x0e, 0x4e, (byte)0xba, (byte)0x9f, (byte)0xa1,(byte) 0xae,(byte) 0x93, (byte)0xa8,(byte) 0xa0, 0x08, 5211 0x02, 0x08, 0x0c, (byte)0xe2, 0x16, (byte)0xa3, (byte)0xb7, (byte)0xcb, 0x0f, 0x4f,(byte) 0xe1,(byte) 0x80, 0x05,(byte) 0xec, 0x60, (byte)0x8d, 5212 (byte)0xea, 0x06,(byte) 0xd3,(byte) 0xe6, 0x0f,(byte) 0x8a, 0x00, 0x30, 0x44, 0x65,(byte) 0xb9, (byte)0xe4, (byte)0xfe,(byte) 0xe7,(byte) 0xc2, 0x06, 5213 (byte)0xcb, (byte)0x82 5214 }; 5215 5216 CharsetProviderICU cs = new CharsetProviderICU(); 5217 CharsetICU charset = (CharsetICU)cs.charsetForName("scsu"); 5218 CharsetDecoder decode = charset.newDecoder(); 5219 CharsetEncoder encode = charset.newEncoder(); 5220 5221 //String[] codePoints = {"allFeatures", "german","russian","japanese"}; 5222 byte[][] fromUnicode={allFeaturesSCSU,germanSCSU,russianSCSU,japaneseSCSU}; 5223 char[][] toUnicode = {allFeaturesUTF16, germanUTF16,russianUTF16,japaneseUTF16}; 5224 5225 for(int i=0;i<4;i++){ 5226 ByteBuffer decoderBuffer = ByteBuffer.wrap(fromUnicode[i]); 5227 CharBuffer encoderBuffer = CharBuffer.wrap(toUnicode[i]); 5228 5229 try{ 5230 // Decoding 5231 CharBuffer decoderResult = decode.decode(decoderBuffer); 5232 encoderBuffer.position(0); 5233 if(!decoderResult.equals(encoderBuffer)){ 5234 errln("Error occured while decoding "+ charset.name()); 5235 } 5236 // Encoding 5237 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5238 // RoundTrip Test 5239 ByteBuffer roundTrip = encoderResult; 5240 CharBuffer roundTripResult = decode.decode(roundTrip); 5241 encoderBuffer.position(0); 5242 if(!roundTripResult.equals(encoderBuffer)){ 5243 errln("Error occured while encoding "+ charset.name()); 5244 } 5245 // Test overflow for code coverage reasons 5246 if (i == 0) { 5247 ByteBuffer test = encoderResult; 5248 test.position(0); 5249 CharBuffer smallBuffer = CharBuffer.allocate(11); 5250 decode.reset(); 5251 CoderResult status = decode.decode(test, smallBuffer, true); 5252 if (status != CoderResult.OVERFLOW) { 5253 errln("Overflow buffer error should have been thrown."); 5254 } 5255 } 5256 }catch(Exception e){ 5257 errln("Exception while converting SCSU thrown: " + e); 5258 } 5259 } 5260 5261 /* Provide better code coverage */ 5262 /* testing illegal codepoints */ 5263 CoderResult illegalResult = CoderResult.UNDERFLOW; 5264 CharBuffer illegalDecoderTrgt = CharBuffer.allocate(10); 5265 5266 byte[] illegalDecoderSrc1 = { (byte)0x41, (byte)0xdf, (byte)0x0c }; 5267 decode.reset(); 5268 illegalResult = decode.decode(ByteBuffer.wrap(illegalDecoderSrc1), illegalDecoderTrgt, true); 5269 if (illegalResult == CoderResult.OVERFLOW || illegalResult == CoderResult.UNDERFLOW) { 5270 errln("Malformed error should have been returned for decoder " + charset.name()); 5271 } 5272 /* code coverage test from nucnvtst.c in ICU4C */ 5273 CoderResult ccResult = CoderResult.UNDERFLOW; 5274 int CCBufSize = 120 * 10; 5275 ByteBuffer trgt = ByteBuffer.allocate(CCBufSize); 5276 CharBuffer test = CharBuffer.allocate(CCBufSize); 5277 String [] ccSrc = { 5278 "\ud800\udc00", /* smallest surrogate*/ 5279 "\ud8ff\udcff", 5280 "\udBff\udFff", /* largest surrogate pair*/ 5281 "\ud834\udc00", 5282 //"\U0010FFFF", 5283 "Hello \u9292 \u9192 World!", 5284 "Hell\u0429o \u9292 \u9192 W\u00e4rld!", 5285 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5286 5287 "\u0648\u06c8", /* catch missing reset*/ 5288 "\u0648\u06c8", 5289 5290 "\u4444\uE001", /* lowest quotable*/ 5291 "\u4444\uf2FF", /* highest quotable*/ 5292 "\u4444\uf188\u4444", 5293 "\u4444\uf188\uf288", 5294 "\u4444\uf188abc\u0429\uf288", 5295 "\u9292\u2222", 5296 "Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!", 5297 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5298 "Hello World!123456", 5299 "Hello W\u0081\u011f\u0082!", /* Latin 1 run*/ 5300 5301 "abc\u0301\u0302", /* uses SQn for u301 u302*/ 5302 "abc\u4411d", /* uses SQU*/ 5303 "abc\u4411\u4412d",/* uses SCU*/ 5304 "abc\u0401\u0402\u047f\u00a5\u0405", /* uses SQn for ua5*/ 5305 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", /* SJIS like data*/ 5306 "\u9292\u2222", 5307 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", 5308 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", 5309 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", 5310 5311 "", /* empty input*/ 5312 "\u0000", /* smallest BMP character*/ 5313 "\uFFFF", /* largest BMP character*/ 5314 5315 /* regression tests*/ 5316 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", 5317 /*"\u00df\u01df\uf000\udbff\udfff\u000d\n\u0041\u00df\u0401\u015f\u00df\u01df\uf000\udbff\udfff",*/ 5318 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", 5319 "\u0041\u00df\u0401\u015f", 5320 "\u9066\u2123abc", 5321 //"\ud266\u43d7\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", 5322 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489", 5323 }; 5324 for (int i = 0; i < ccSrc.length; i++) { 5325 CharBuffer ubuf = CharBuffer.wrap(ccSrc[i]); 5326 encode.reset(); 5327 decode.reset(); 5328 trgt.clear(); 5329 test.clear(); 5330 ccResult = encode.encode(ubuf, trgt, true); 5331 if (ccResult.isError()) { 5332 errln("Error while encoding " + charset.name() + " in test for code coverage[" + i + "]."); 5333 } else { 5334 trgt.limit(trgt.position()); 5335 trgt.position(0); 5336 ccResult = decode.decode(trgt, test, true); 5337 if (ccResult.isError()) { 5338 errln("Error while decoding " + charset.name() + " in test for code coverage[" + i + "]."); 5339 } else { 5340 ubuf.position(0); 5341 test.limit(test.position()); 5342 test.position(0); 5343 if (!equals(test, ubuf)) { 5344 errln("Roundtrip failed for " + charset.name() + " in test for code coverage[" + i + "]."); 5345 } 5346 } 5347 } 5348 } 5349 5350 /* Monkey test */ 5351 { 5352 char[] monkeyIn = { 5353 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 5354 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 5355 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 5356 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 5357 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 5358 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 5359 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 5360 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 5361 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 5362 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 5363 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 5364 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 5365 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 5366 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 5367 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 5368 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 5369 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 5370 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 5371 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 5372 /* test non-BMP code points */ 5373 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 5374 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 5375 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 5376 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 5377 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 5378 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 5379 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 5380 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 5381 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 5382 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 5383 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 5384 5385 5386 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 5387 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 5388 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 5389 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 5390 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 5391 }; 5392 encode.reset(); 5393 decode.reset(); 5394 CharBuffer monkeyCB = CharBuffer.wrap(monkeyIn); 5395 try { 5396 ByteBuffer monkeyBB = encode.encode(monkeyCB); 5397 /* CharBuffer monkeyEndResult =*/ decode.decode(monkeyBB); 5398 5399 } catch (Exception ex) { 5400 errln("Exception thrown while encoding/decoding monkey test in SCSU: " + ex); 5401 } 5402 } 5403 // Test malformed 5404 { 5405 char[] malformedSequence = { 5406 0xD899, 0xDC7F, 0xDC88, 0xDC88, 0xD888, 0xDDF9 5407 }; 5408 encode.reset(); 5409 CharBuffer malformedSrc = CharBuffer.wrap(malformedSequence); 5410 5411 try { 5412 encode.encode(malformedSrc); 5413 errln("Malformed error should have thrown an exception."); 5414 } catch (Exception ex) { 5415 } 5416 } 5417 // Test overflow buffer 5418 { 5419 ByteBuffer overflowTest = ByteBuffer.wrap(allFeaturesSCSU); 5420 int sizes[] = { 8, 2, 11 }; 5421 for (int i = 0; i < sizes.length; i++) { 5422 try { 5423 decode.reset(); 5424 overflowTest.position(0); 5425 smBufDecode(decode, "SCSU overflow test", overflowTest, CharBuffer.allocate(sizes[i]), true, false); 5426 errln("Buffer overflow exception should have been thrown."); 5427 } catch (BufferOverflowException ex) { 5428 } catch (Exception ex) { 5429 errln("Buffer overflow exception should have been thrown."); 5430 } 5431 } 5432 5433 } 5434 } 5435 5436 /* Test for BOCU1 converter*/ 5437 @Test TestBOCU1Converter()5438 public void TestBOCU1Converter(){ 5439 char expected[]={ 5440 0xFEFF, 0x0061, 0x0062, 0x0020, // 0 5441 0x0063, 0x0061, 0x000D, 0x000A, 5442 5443 0x0020, 0x0000, 0x00DF, 0x00E6, // 8 5444 0x0930, 0x0020, 0x0918, 0x0909, 5445 5446 0x3086, 0x304D, 0x0020, 0x3053, // 16 5447 0x4000, 0x4E00, 0x7777, 0x0020, 5448 5449 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, // 24 5450 0x0020, 0xD7A3, 0xDC00, 0xD800, 5451 5452 0xD800, 0xDC00, 0xD845, 0xDDDD, // 32 5453 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 5454 5455 0xDFFF, 0x0001, 0x0E40, 0x0020, // 40 5456 0x0009 5457 }; 5458 5459 byte sampleText[]={ // from cintltst/bocu1tst.c/TestBOCU1 text 1 5460 (byte) 0xFB, 5461 (byte) 0xEE, 5462 0x28, // from source offset 0 5463 0x24, 0x1E, 0x52, (byte) 0xB2, 0x20, 5464 (byte) 0xB3, 5465 (byte) 0xB1, 5466 0x0D, 5467 0x0A, 5468 5469 0x20, // from 8 5470 0x00, (byte) 0xD0, 0x6C, (byte) 0xB6, (byte) 0xD8, (byte) 0xA5, 5471 0x20, 0x68, 5472 0x59, 5473 5474 (byte) 0xF9, 5475 0x28, // from 16 5476 0x6D, 0x20, 0x73, (byte) 0xE0, 0x2D, (byte) 0xDE, 0x43, 5477 (byte) 0xD0, 0x33, 0x20, 5478 5479 (byte) 0xFA, 5480 (byte) 0x83, // from 24 5481 0x25, 0x01, (byte) 0xFB, 0x16, (byte) 0x87, 0x4B, 0x16, 0x20, 5482 (byte) 0xE6, (byte) 0xBD, (byte) 0xEB, 0x5B, 0x4B, (byte) 0xCC, 5483 5484 (byte) 0xF9, 5485 (byte) 0xA2, // from 32 5486 (byte) 0xFC, 0x10, 0x3E, (byte) 0xFE, 0x16, 0x3A, (byte) 0x8C, 5487 0x20, (byte) 0xFC, 0x03, (byte) 0xAC, 5488 5489 0x01, /// from 41 5490 (byte) 0xDE, (byte) 0x83, 0x20, 0x09 5491 }; 5492 5493 CharsetProviderICU cs = new CharsetProviderICU(); 5494 CharsetICU charset = (CharsetICU)cs.charsetForName("BOCU-1"); 5495 CharsetDecoder decode = charset.newDecoder(); 5496 CharsetEncoder encode = charset.newEncoder(); 5497 5498 ByteBuffer decoderBuffer = ByteBuffer.wrap(sampleText); 5499 CharBuffer encoderBuffer = CharBuffer.wrap(expected); 5500 try{ 5501 // Decoding 5502 CharBuffer decoderResult = decode.decode(decoderBuffer); 5503 5504 encoderBuffer.position(0); 5505 if(!decoderResult.equals(encoderBuffer)){ 5506 errln("Error occured while decoding "+ charset.name()); 5507 } 5508 // Encoding 5509 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5510 // RoundTrip Test 5511 ByteBuffer roundTrip = encoderResult; 5512 CharBuffer roundTripResult = decode.decode(roundTrip); 5513 5514 encoderBuffer.position(0); 5515 if(!roundTripResult.equals(encoderBuffer)){ 5516 errln("Error occured while encoding "+ charset.name()); 5517 } 5518 }catch(Exception e){ 5519 errln("Exception while converting BOCU-1 thrown: " + e); 5520 } 5521 } 5522 5523 /* Test that ICU4C and ICU4J get the same ICU canonical name when given the same alias. */ 5524 @Test TestICUCanonicalNameConsistency()5525 public void TestICUCanonicalNameConsistency() { 5526 String[] alias = { 5527 "KSC_5601" 5528 }; 5529 String[] expected = { 5530 "windows-949-2000" 5531 }; 5532 5533 for (int i = 0; i < alias.length; i++) { 5534 String name = CharsetProviderICU.getICUCanonicalName(alias[i]); 5535 if (!name.equals(expected[i])) { 5536 errln("The ICU canonical name in ICU4J does not match that in ICU4C. Result: " + name + "Expected: " + expected[i]); 5537 } 5538 } 5539 } 5540 5541 /* Increase code coverage for CharsetICU and CharsetProviderICU*/ 5542 @Test TestCharsetICUCodeCoverage()5543 public void TestCharsetICUCodeCoverage() { 5544 CharsetProviderICU provider = new CharsetProviderICU(); 5545 5546 if (provider.charsetForName("UTF16", null) != null) { 5547 errln("charsetForName should have returned a null"); 5548 } 5549 5550 if (CharsetProviderICU.getJavaCanonicalName(null) != null) { 5551 errln("getJavaCanonicalName should have returned a null when null is given to it."); 5552 } 5553 5554 try { 5555 Charset testCharset = CharsetICU.forNameICU("bogus"); 5556 errln("UnsupportedCharsetException should be thrown for charset \"bogus\" - but got charset " + testCharset.name()); 5557 } catch (UnsupportedCharsetException ex) { 5558 logln("UnsupportedCharsetException was thrown for CharsetICU.forNameICU(\"bogus\")"); 5559 } 5560 5561 Charset charset = provider.charsetForName("UTF16"); 5562 5563 try { 5564 ((CharsetICU)charset).getUnicodeSet(null, 0); 5565 } catch (IllegalArgumentException ex) { 5566 return; 5567 } 5568 errln("IllegalArgumentException should have been thrown."); 5569 } 5570 5571 @Test TestCharsetLMBCS()5572 public void TestCharsetLMBCS() { 5573 String []lmbcsNames = { 5574 "LMBCS-1", 5575 "LMBCS-2", 5576 "LMBCS-3", 5577 "LMBCS-4", 5578 "LMBCS-5", 5579 "LMBCS-6", 5580 "LMBCS-8", 5581 "LMBCS-11", 5582 "LMBCS-16", 5583 "LMBCS-17", 5584 "LMBCS-18", 5585 "LMBCS-19" 5586 }; 5587 5588 char[] src = { 5589 0x0192, 0x0041, 0x0061, 0x00D0, 0x00F6, 0x0100, 0x0174, 0x02E4, 0x03F5, 0x03FB, 5590 0x05D3, 0x05D4, 0x05EA, 0x0684, 0x0685, 0x1801, 0x11B3, 0x11E8, 0x1F9A, 0x2EB4, 5591 0x3157, 0x3336, 0x3304, 0xD881, 0xDC88 5592 }; 5593 CharBuffer cbInput = CharBuffer.wrap(src); 5594 5595 CharsetProviderICU provider = new CharsetProviderICU(); 5596 5597 for (int i = 0; i < lmbcsNames.length; i++) { 5598 Charset charset = provider.charsetForName(lmbcsNames[i]); 5599 if (charset == null) { 5600 errln("Unable to create LMBCS charset: " + lmbcsNames[i]); 5601 return; 5602 } 5603 CharsetEncoder encoder = charset.newEncoder(); 5604 CharsetDecoder decoder = charset.newDecoder(); 5605 5606 try { 5607 cbInput.position(0); 5608 ByteBuffer bbTmp = encoder.encode(cbInput); 5609 CharBuffer cbOutput = decoder.decode(bbTmp); 5610 5611 if (!equals(cbInput, cbOutput)) { 5612 errln("Roundtrip test failed for charset: " + lmbcsNames[i]); 5613 } 5614 } catch (Exception ex) { 5615 if (i >= 8) { 5616 /* Expected exceptions */ 5617 continue; 5618 } 5619 errln("Exception thrown: " + ex + " while using charset: " + lmbcsNames[i]); 5620 } 5621 5622 } 5623 5624 // Test malformed 5625 CoderResult malformedResult = CoderResult.UNDERFLOW; 5626 byte[] malformedBytes = { 5627 (byte)0x61, (byte)0x01, (byte)0x29, (byte)0x81, (byte)0xa0, (byte)0x0f 5628 }; 5629 ByteBuffer malformedSrc = ByteBuffer.wrap(malformedBytes); 5630 CharBuffer malformedTrgt = CharBuffer.allocate(10); 5631 int[] malformedLimits = { 5632 2, 6 5633 }; 5634 CharsetDecoder malformedDecoderTest = provider.charsetForName("LMBCS-1").newDecoder(); 5635 for (int n = 0; n < malformedLimits.length; n++) { 5636 malformedDecoderTest.reset(); 5637 5638 malformedSrc.position(0); 5639 malformedSrc.limit(malformedLimits[n]); 5640 5641 malformedTrgt.clear(); 5642 5643 malformedResult = malformedDecoderTest.decode(malformedSrc,malformedTrgt, true); 5644 if (!malformedResult.isMalformed()) { 5645 errln("Malformed error should have resulted."); 5646 } 5647 } 5648 } 5649 5650 /* 5651 * This is a port of ICU4C TestAmbiguousConverter in cintltst. 5652 * Since there is no concept of ambiguous converters in ICU4J 5653 * this test is merely for code coverage reasons. 5654 */ 5655 @Test TestAmbiguousConverter()5656 public void TestAmbiguousConverter() { 5657 byte [] inBytes = { 5658 0x61, 0x5b, 0x5c 5659 }; 5660 ByteBuffer src = ByteBuffer.wrap(inBytes); 5661 CharBuffer trgt = CharBuffer.allocate(20); 5662 5663 CoderResult result = CoderResult.UNDERFLOW; 5664 CharsetProviderICU provider = new CharsetProviderICU(); 5665 String[] names = CharsetProviderICU.getAllNames(); 5666 5667 for (int i = 0; i < names.length; i++) { 5668 Charset charset = provider.charsetForName(names[i]); 5669 if (charset == null) { 5670 /* We don't care about any failures because not all converters are available. */ 5671 continue; 5672 } 5673 CharsetDecoder decoder = charset.newDecoder(); 5674 5675 src.position(0); 5676 trgt.clear(); 5677 5678 result = decoder.decode(src, trgt, true); 5679 if (result.isError()) { 5680 /* We don't care about any failures. */ 5681 continue; 5682 } 5683 } 5684 } 5685 5686 @Test TestIsFixedWidth()5687 public void TestIsFixedWidth(){ 5688 String[] fixedWidth = { 5689 "US-ASCII", 5690 "UTF32", 5691 "ibm-5478_P100-1995" 5692 }; 5693 5694 String[] notFixedWidth = { 5695 "GB18030", 5696 "UTF8", 5697 "windows-949-2000", 5698 "UTF16" 5699 }; 5700 CharsetProvider provider = new CharsetProviderICU(); 5701 Charset charset; 5702 5703 for (int i = 0; i < fixedWidth.length; i++) { 5704 charset = provider.charsetForName(fixedWidth[i]); 5705 5706 if (!((CharsetICU)charset).isFixedWidth()) { 5707 errln(fixedWidth[i] + " is a fixedWidth charset but returned false."); 5708 } 5709 } 5710 5711 for (int i = 0; i < notFixedWidth.length; i++) { 5712 charset = provider.charsetForName(notFixedWidth[i]); 5713 5714 if (((CharsetICU)charset).isFixedWidth()) { 5715 errln(notFixedWidth[i] + " is NOT a fixedWidth charset but returned true."); 5716 } 5717 } 5718 } 5719 5720 @Test TestBytesLengthForString()5721 public void TestBytesLengthForString() { 5722 CharsetProviderICU provider = new CharsetProviderICU(); 5723 String[] charsets = { 5724 "windows-949-2000", 5725 "ibm-1047_P100-1995,swaplfnl", 5726 "ibm-930_P120-1999", 5727 "ISCII,version=0", 5728 "ISO_2022,locale=ko,version=0" 5729 }; 5730 5731 int[] expected = { 5732 40, 5733 20, 5734 80, /* changed from 60 to 80 to reflect the updates by #9205 */ 5735 80, 5736 160 5737 }; 5738 5739 int stringLength = 10; 5740 int length; 5741 int maxCharSize; 5742 5743 for (int i = 0; i < charsets.length; i++) { 5744 maxCharSize = (int)provider.charsetForName(charsets[i]).newEncoder().maxBytesPerChar(); 5745 length = CharsetEncoderICU.getMaxBytesForString(stringLength, maxCharSize); 5746 5747 if (length != expected[i]) { 5748 errln("For charset " + charsets[i] + " with string length " + stringLength + ", expected max byte length is " + expected[i] + " but got " + length); 5749 } 5750 } 5751 } 5752 5753 /* 5754 * When converting slices of a larger CharBuffer, Charset88591 and CharsetASCII does not handle the buffer correctly when 5755 * an unmappable character occurs. 5756 * Ticket #8729 5757 */ 5758 @Test TestCharsetASCII8859BufferHandling()5759 public void TestCharsetASCII8859BufferHandling() { 5760 String firstLine = "C077693790=|MEMO=|00=|022=|Blanche st and the driveway grate was fault and rotated under my car=|\r\n"; 5761 String secondLine = "C077693790=|MEMO=|00=|023=|puncturing the fuel tank. I spoke to the store operator (Ram Reddi –=|\r\n"; 5762 5763 String charsetNames[] = { 5764 "ASCII", 5765 "ISO-8859-1" 5766 }; 5767 5768 CoderResult result = CoderResult.UNDERFLOW; 5769 5770 CharsetEncoder encoder; 5771 5772 ByteBuffer outBuffer = ByteBuffer.allocate(500); 5773 CharBuffer charBuffer = CharBuffer.allocate(firstLine.length() + secondLine.length()); 5774 charBuffer.put(firstLine); 5775 charBuffer.put(secondLine); 5776 charBuffer.flip(); 5777 5778 for (int i = 0; i < charsetNames.length; i++) { 5779 encoder = CharsetICU.forNameICU(charsetNames[i]).newEncoder(); 5780 5781 charBuffer.position(firstLine.length()); 5782 CharBuffer charBufferSlice = charBuffer.slice(); 5783 charBufferSlice.limit(secondLine.length() - 2); 5784 5785 5786 try { 5787 result = encoder.encode(charBufferSlice, outBuffer, false); 5788 if (!result.isUnmappable()) { 5789 errln("Result of encoding " + charsetNames[i] + " should be: \"Unmappable\". Instead got: " + result); 5790 } 5791 } catch (IllegalArgumentException ex) { 5792 errln("IllegalArgumentException should not have been thrown when encoding: " + charsetNames[i]); 5793 } 5794 } 5795 } 5796 5797 /* 5798 * When converting with the String method getBytes(), buffer overflow exception is thrown because 5799 * of the way ICU4J is calculating the max bytes per char. This should be changed only on the ICU4J 5800 * side to match what the Java method is expecting. The ICU4C size will be left unchanged. 5801 * Ticket #9205 5802 */ 5803 @Test TestBufferOverflowErrorUsingJavagetBytes()5804 public void TestBufferOverflowErrorUsingJavagetBytes() { 5805 String charsetName = "ibm-5035"; 5806 String testCase = "\u7d42"; 5807 5808 try { 5809 testCase.getBytes(charsetName); 5810 } catch (Exception ex) { 5811 errln("Error calling getBytes(): " + ex); 5812 } 5813 5814 } 5815 5816 @Test TestDefaultIgnorableCallback()5817 public void TestDefaultIgnorableCallback() { 5818 String cnv_name = "euc-jp-2007"; 5819 String pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; 5820 String pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]"; 5821 UnicodeSet set_ignorable = new UnicodeSet(pattern_ignorable); 5822 UnicodeSet set_not_ignorable = new UnicodeSet(pattern_not_ignorable); 5823 CharsetEncoder encoder = CharsetICU.forNameICU(cnv_name).newEncoder(); 5824 5825 // set callback for the converter 5826 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 5827 encoder.onMalformedInput(CodingErrorAction.REPLACE); 5828 5829 // test ignorable code points are ignored 5830 int size = set_ignorable.size(); 5831 for (int i = 0; i < size; i++) { 5832 encoder.reset(); 5833 try { 5834 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_ignorable.charAt(i)))).limit() > 0) { 5835 errln("Callback should have ignore default ignorable: U+" + Integer.toHexString(set_ignorable.charAt(i))); 5836 } 5837 } catch (Exception ex) { 5838 errln("Error received converting +" + Integer.toHexString(set_ignorable.charAt(i))); 5839 } 5840 } 5841 5842 // test non-ignorable code points are not ignored 5843 size = set_not_ignorable.size(); 5844 for (int i = 0; i < size; i++) { 5845 encoder.reset(); 5846 try { 5847 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_not_ignorable.charAt(i)))).limit() == 0) { 5848 errln("Callback should not have ignored: U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5849 } 5850 } catch (Exception ex) { 5851 errln("Error received converting U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5852 } 5853 } 5854 } 5855 } 5856