1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.charset; 11 12 import java.nio.BufferOverflowException; 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.charset.CharacterCodingException; 16 import java.nio.charset.Charset; 17 import java.nio.charset.CharsetDecoder; 18 import java.nio.charset.CharsetEncoder; 19 import java.nio.charset.CoderResult; 20 import java.nio.charset.CodingErrorAction; 21 import java.nio.charset.UnsupportedCharsetException; 22 import java.nio.charset.spi.CharsetProvider; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.Iterator; 26 import java.util.MissingResourceException; 27 import java.util.Set; 28 import java.util.SortedMap; 29 30 import org.junit.Test; 31 import org.junit.runner.RunWith; 32 import org.junit.runners.JUnit4; 33 34 import com.ibm.icu.charset.CharsetCallback; 35 import com.ibm.icu.charset.CharsetDecoderICU; 36 import com.ibm.icu.charset.CharsetEncoderICU; 37 import com.ibm.icu.charset.CharsetICU; 38 import com.ibm.icu.charset.CharsetProviderICU; 39 import com.ibm.icu.dev.test.TestFmwk; 40 import com.ibm.icu.text.UTF16; 41 import com.ibm.icu.text.UnicodeSet; 42 43 @RunWith(JUnit4.class) 44 public class TestCharset extends TestFmwk { 45 @Test TestUTF16Converter()46 public void TestUTF16Converter(){ 47 CharsetProvider icu = new CharsetProviderICU(); 48 Charset cs1 = icu.charsetForName("UTF-16BE"); 49 CharsetEncoder e1 = cs1.newEncoder(); 50 CharsetDecoder d1 = cs1.newDecoder(); 51 52 Charset cs2 = icu.charsetForName("UTF-16LE"); 53 CharsetEncoder e2 = cs2.newEncoder(); 54 CharsetDecoder d2 = cs2.newDecoder(); 55 56 for(int i=0x0000; i<0x10FFFF; i+=0xFF){ 57 CharBuffer us = CharBuffer.allocate(0xFF*2); 58 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 59 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 60 for(int j=0;j<0xFF; j++){ 61 int c = i+j; 62 63 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 64 continue; 65 } 66 67 if(c>0xFFFF){ 68 char lead = UTF16.getLeadSurrogate(c); 69 char trail = UTF16.getTrailSurrogate(c); 70 if(!UTF16.isLeadSurrogate(lead)){ 71 errln("lead is not lead!"+lead+" for cp: \\U"+Integer.toHexString(c)); 72 continue; 73 } 74 if(!UTF16.isTrailSurrogate(trail)){ 75 errln("trail is not trail!"+trail); 76 continue; 77 } 78 us.put(lead); 79 us.put(trail); 80 bs1.put((byte)(lead>>8)); 81 bs1.put((byte)(lead&0xFF)); 82 bs1.put((byte)(trail>>8)); 83 bs1.put((byte)(trail&0xFF)); 84 85 bs2.put((byte)(lead&0xFF)); 86 bs2.put((byte)(lead>>8)); 87 bs2.put((byte)(trail&0xFF)); 88 bs2.put((byte)(trail>>8)); 89 }else{ 90 91 if(c<0xFF){ 92 bs1.put((byte)0x00); 93 bs1.put((byte)(c)); 94 bs2.put((byte)(c)); 95 bs2.put((byte)0x00); 96 }else{ 97 bs1.put((byte)(c>>8)); 98 bs1.put((byte)(c&0xFF)); 99 100 bs2.put((byte)(c&0xFF)); 101 bs2.put((byte)(c>>8)); 102 } 103 us.put((char)c); 104 } 105 } 106 107 108 us.limit(us.position()); 109 us.position(0); 110 if(us.length()==0){ 111 continue; 112 } 113 114 115 bs1.limit(bs1.position()); 116 bs1.position(0); 117 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 118 //newBS.put((byte)0xFE); 119 //newBS.put((byte)0xFF); 120 newBS.put(bs1); 121 bs1.position(0); 122 smBufDecode(d1, "UTF-16", bs1, us); 123 smBufEncode(e1, "UTF-16", us, newBS); 124 125 bs2.limit(bs2.position()); 126 bs2.position(0); 127 newBS.clear(); 128 //newBS.put((byte)0xFF); 129 //newBS.put((byte)0xFE); 130 newBS.put(bs2); 131 bs2.position(0); 132 smBufDecode(d2, "UTF16-LE", bs2, us); 133 smBufEncode(e2, "UTF-16LE", us, newBS); 134 135 } 136 } 137 138 @Test TestUTF32Converter()139 public void TestUTF32Converter(){ 140 CharsetProvider icu = new CharsetProviderICU(); 141 Charset cs1 = icu.charsetForName("UTF-32BE"); 142 CharsetEncoder e1 = cs1.newEncoder(); 143 CharsetDecoder d1 = cs1.newDecoder(); 144 145 Charset cs2 = icu.charsetForName("UTF-32LE"); 146 CharsetEncoder e2 = cs2.newEncoder(); 147 CharsetDecoder d2 = cs2.newDecoder(); 148 149 for(int i=0x000; i<0x10FFFF; i+=0xFF){ 150 CharBuffer us = CharBuffer.allocate(0xFF*2); 151 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 152 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 153 for(int j=0;j<0xFF; j++){ 154 int c = i+j; 155 156 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 157 continue; 158 } 159 160 if(c>0xFFFF){ 161 char lead = UTF16.getLeadSurrogate(c); 162 char trail = UTF16.getTrailSurrogate(c); 163 164 us.put(lead); 165 us.put(trail); 166 }else{ 167 us.put((char)c); 168 } 169 bs1.put((byte) (c >>> 24)); 170 bs1.put((byte) (c >>> 16)); 171 bs1.put((byte) (c >>> 8)); 172 bs1.put((byte) (c & 0xFF)); 173 174 bs2.put((byte) (c & 0xFF)); 175 bs2.put((byte) (c >>> 8)); 176 bs2.put((byte) (c >>> 16)); 177 bs2.put((byte) (c >>> 24)); 178 } 179 bs1.limit(bs1.position()); 180 bs1.position(0); 181 bs2.limit(bs2.position()); 182 bs2.position(0); 183 us.limit(us.position()); 184 us.position(0); 185 if(us.length()==0){ 186 continue; 187 } 188 189 190 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 191 192 newBS.put((byte)0x00); 193 newBS.put((byte)0x00); 194 newBS.put((byte)0xFE); 195 newBS.put((byte)0xFF); 196 197 newBS.put(bs1); 198 bs1.position(0); 199 smBufDecode(d1, "UTF-32", bs1, us); 200 smBufEncode(e1, "UTF-32", us, newBS); 201 202 203 newBS.clear(); 204 205 newBS.put((byte)0xFF); 206 newBS.put((byte)0xFE); 207 newBS.put((byte)0x00); 208 newBS.put((byte)0x00); 209 210 newBS.put(bs2); 211 bs2.position(0); 212 smBufDecode(d2, "UTF-32LE", bs2, us); 213 smBufEncode(e2, "UTF-32LE", us, newBS); 214 215 } 216 } 217 218 @Test TestASCIIConverter()219 public void TestASCIIConverter() { 220 runTestASCIIBasedConverter("ASCII", 0x80); 221 } 222 223 @Test Test88591Converter()224 public void Test88591Converter() { 225 runTestASCIIBasedConverter("iso-8859-1", 0x100); 226 } 227 runTestASCIIBasedConverter(String converter, int limit)228 public void runTestASCIIBasedConverter(String converter, int limit){ 229 CharsetProvider icu = new CharsetProviderICU(); 230 Charset icuChar = icu.charsetForName(converter); 231 CharsetEncoder encoder = icuChar.newEncoder(); 232 CharsetDecoder decoder = icuChar.newDecoder(); 233 CoderResult cr; 234 235 /* test with and without array-backed buffers */ 236 237 byte[] bytes = new byte[0x10000]; 238 char[] chars = new char[0x10000]; 239 for (int j = 0; j <= 0xffff; j++) { 240 bytes[j] = (byte) j; 241 chars[j] = (char) j; 242 } 243 244 boolean fail = false; 245 boolean arrays = false; 246 boolean decoding = false; 247 int i; 248 249 // 0 thru limit - 1 250 ByteBuffer bs = ByteBuffer.wrap(bytes, 0, limit); 251 CharBuffer us = CharBuffer.wrap(chars, 0, limit); 252 smBufDecode(decoder, converter, bs, us, true); 253 smBufDecode(decoder, converter, bs, us, false); 254 smBufEncode(encoder, converter, us, bs, true); 255 smBufEncode(encoder, converter, us, bs, false); 256 for (i = 0; i < limit; i++) { 257 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 258 us = CharBuffer.wrap(chars, i, 1).slice(); 259 try { 260 decoding = true; 261 arrays = true; 262 smBufDecode(decoder, converter, bs, us, true, false, true); 263 264 decoding = true; 265 arrays = false; 266 smBufDecode(decoder, converter, bs, us, true, false, false); 267 268 decoding = false; 269 arrays = true; 270 smBufEncode(encoder, converter, us, bs, true, false, true); 271 272 decoding = false; 273 arrays = false; 274 smBufEncode(encoder, converter, us, bs, true, false, false); 275 276 } catch (Exception ex) { 277 errln("Failed to fail to " + (decoding ? "decode" : "encode") + " 0x" 278 + Integer.toHexString(i) + (arrays ? " with arrays" : " without arrays") + " in " + converter); 279 return; 280 } 281 } 282 283 // decode limit thru 255 284 for (i = limit; i <= 0xff; i++) { 285 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 286 us = CharBuffer.wrap(chars, i, 1).slice(); 287 try { 288 smBufDecode(decoder, converter, bs, us, true, false, true); 289 fail = true; 290 arrays = true; 291 break; 292 } catch (Exception ex) { 293 } 294 try { 295 smBufDecode(decoder, converter, bs, us, true, false, false); 296 fail = true; 297 arrays = false; 298 break; 299 } catch (Exception ex) { 300 } 301 } 302 if (fail) { 303 errln("Failed to fail to decode 0x" + Integer.toHexString(i) 304 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 305 return; 306 } 307 308 // encode limit thru 0xffff, skipping through much of the 1ff to feff range to save 309 // time (it would take too much time to test every possible case) 310 for (i = limit; i <= 0xffff; i = ((i>=0x1ff && i<0xfeff) ? i+0xfd : i+1)) { 311 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 312 us = CharBuffer.wrap(chars, i, 1).slice(); 313 try { 314 smBufEncode(encoder, converter, us, bs, true, false, true); 315 fail = true; 316 arrays = true; 317 break; 318 } catch (Exception ex) { 319 } 320 try { 321 smBufEncode(encoder, converter, us, bs, true, false, false); 322 fail = true; 323 arrays = false; 324 break; 325 } catch (Exception ex) { 326 } 327 } 328 if (fail) { 329 errln("Failed to fail to encode 0x" + Integer.toHexString(i) 330 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 331 return; 332 } 333 334 // test overflow / underflow edge cases 335 outer: for (int n = 1; n <= 3; n++) { 336 for (int m = 0; m < n; m++) { 337 // expecting underflow 338 try { 339 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 340 us = CharBuffer.wrap(chars, 'a', m).slice(); 341 smBufDecode(decoder, converter, bs, us, true, false, true); 342 smBufDecode(decoder, converter, bs, us, true, false, false); 343 smBufEncode(encoder, converter, us, bs, true, false, true); 344 smBufEncode(encoder, converter, us, bs, true, false, false); 345 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 346 us = CharBuffer.wrap(chars, 'a', n).slice(); 347 smBufDecode(decoder, converter, bs, us, true, false, true, m); 348 smBufDecode(decoder, converter, bs, us, true, false, false, m); 349 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 350 us = CharBuffer.wrap(chars, 'a', m).slice(); 351 smBufEncode(encoder, converter, us, bs, true, false, true, m); 352 smBufEncode(encoder, converter, us, bs, true, false, false, m); 353 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 354 us = CharBuffer.wrap(chars, 'a', n).slice(); 355 smBufDecode(decoder, converter, bs, us, true, false, true); 356 smBufDecode(decoder, converter, bs, us, true, false, false); 357 smBufEncode(encoder, converter, us, bs, true, false, true); 358 smBufEncode(encoder, converter, us, bs, true, false, false); 359 } catch (Exception ex) { 360 fail = true; 361 break outer; 362 } 363 364 // expecting overflow 365 try { 366 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 367 us = CharBuffer.wrap(chars, 'a', m).slice(); 368 smBufDecode(decoder, converter, bs, us, true, false, true); 369 fail = true; 370 break; 371 } catch (Exception ex) { 372 if (!(ex instanceof BufferOverflowException)) { 373 fail = true; 374 break outer; 375 } 376 } 377 try { 378 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 379 us = CharBuffer.wrap(chars, 'a', m).slice(); 380 smBufDecode(decoder, converter, bs, us, true, false, false); 381 fail = true; 382 } catch (Exception ex) { 383 if (!(ex instanceof BufferOverflowException)) { 384 fail = true; 385 break outer; 386 } 387 } 388 try { 389 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 390 us = CharBuffer.wrap(chars, 'a', n).slice(); 391 smBufEncode(encoder, converter, us, bs, true, false, true); 392 fail = true; 393 } catch (Exception ex) { 394 if (!(ex instanceof BufferOverflowException)) { 395 fail = true; 396 break outer; 397 } 398 } 399 try { 400 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 401 us = CharBuffer.wrap(chars, 'a', n).slice(); 402 smBufEncode(encoder, converter, us, bs, true, false, false); 403 fail = true; 404 } catch (Exception ex) { 405 if (!(ex instanceof BufferOverflowException)) { 406 fail = true; 407 break outer; 408 } 409 } 410 } 411 } 412 if (fail) { 413 errln("Incorrect result in " + converter + " for underflow / overflow edge cases"); 414 return; 415 } 416 417 // test surrogate combinations in encoding 418 String lead = "\ud888"; 419 String trail = "\udc88"; 420 String norm = "a"; 421 String ext = "\u0275"; // theta 422 String end = ""; 423 bs = ByteBuffer.wrap(new byte[] { 0 }); 424 String[] input = new String[] { // 425 lead + lead, // malf(1) 426 lead + trail, // unmap(2) 427 lead + norm, // malf(1) 428 lead + ext, // malf(1) 429 lead + end, // malf(1) 430 trail + norm, // malf(1) 431 trail + end, // malf(1) 432 ext + norm, // unmap(1) 433 ext + end, // unmap(1) 434 }; 435 CoderResult[] result = new CoderResult[] { 436 CoderResult.malformedForLength(1), 437 CoderResult.unmappableForLength(2), 438 CoderResult.malformedForLength(1), 439 CoderResult.malformedForLength(1), 440 CoderResult.malformedForLength(1), 441 CoderResult.malformedForLength(1), 442 CoderResult.malformedForLength(1), 443 CoderResult.unmappableForLength(1), 444 CoderResult.unmappableForLength(1), 445 }; 446 447 for (int index = 0; index < input.length; index++) { 448 CharBuffer source = CharBuffer.wrap(input[index]); 449 cr = encoder.encode(source, bs, true); 450 bs.rewind(); 451 encoder.reset(); 452 453 // if cr != results[x] 454 if (!((cr.isUnderflow() && result[index].isUnderflow()) 455 || (cr.isOverflow() && result[index].isOverflow()) 456 || (cr.isMalformed() && result[index].isMalformed()) 457 || (cr.isUnmappable() && result[index].isUnmappable())) 458 || (cr.isError() && cr.length() != result[index].length())) { 459 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 460 break; 461 } 462 463 source = CharBuffer.wrap(input[index].toCharArray()); 464 cr = encoder.encode(source, bs, true); 465 bs.rewind(); 466 encoder.reset(); 467 468 // if cr != results[x] 469 if (!((cr.isUnderflow() && result[index].isUnderflow()) 470 || (cr.isOverflow() && result[index].isOverflow()) 471 || (cr.isMalformed() && result[index].isMalformed()) 472 || (cr.isUnmappable() && result[index].isUnmappable())) 473 || (cr.isError() && cr.length() != result[index].length())) { 474 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 475 break; 476 } 477 } 478 } 479 480 @Test TestUTF8Converter()481 public void TestUTF8Converter() { 482 String converter = "UTF-8"; 483 CharsetProvider icu = new CharsetProviderICU(); 484 Charset icuChar = icu.charsetForName(converter); 485 CharsetEncoder encoder = icuChar.newEncoder(); 486 CharsetDecoder decoder = icuChar.newDecoder(); 487 ByteBuffer bs; 488 CharBuffer us; 489 CoderResult cr; 490 491 492 int[] size = new int[] { 1<<7, 1<<11, 1<<16 }; // # of 1,2,3 byte combinations 493 byte[] bytes = new byte[size[0] + size[1]*2 + size[2]*3]; 494 char[] chars = new char[size[0] + size[1] + size[2]]; 495 int i = 0; 496 int x, y; 497 498 // 0 to 1 << 7 (1 byters) 499 for (; i < size[0]; i++) { 500 bytes[i] = (byte) i; 501 chars[i] = (char) i; 502 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 503 us = CharBuffer.wrap(chars, i, 1).slice(); 504 try { 505 smBufDecode(decoder, converter, bs, us, true, false, true); 506 smBufDecode(decoder, converter, bs, us, true, false, false); 507 smBufEncode(encoder, converter, us, bs, true, false, true); 508 smBufEncode(encoder, converter, us, bs, true, false, false); 509 } catch (Exception ex) { 510 errln("Incorrect result in " + converter + " for 0x" 511 + Integer.toHexString(i)); 512 break; 513 } 514 } 515 516 // 1 << 7 to 1 << 11 (2 byters) 517 for (; i < size[1]; i++) { 518 x = size[0] + i*2; 519 y = size[0] + i; 520 bytes[x + 0] = (byte) (0xc0 | ((i >> 6) & 0x1f)); 521 bytes[x + 1] = (byte) (0x80 | ((i >> 0) & 0x3f)); 522 chars[y] = (char) i; 523 bs = ByteBuffer.wrap(bytes, x, 2).slice(); 524 us = CharBuffer.wrap(chars, y, 1).slice(); 525 try { 526 smBufDecode(decoder, converter, bs, us, true, false, true); 527 smBufDecode(decoder, converter, bs, us, true, false, false); 528 smBufEncode(encoder, converter, us, bs, true, false, true); 529 smBufEncode(encoder, converter, us, bs, true, false, false); 530 } catch (Exception ex) { 531 errln("Incorrect result in " + converter + " for 0x" 532 + Integer.toHexString(i)); 533 break; 534 } 535 } 536 537 // 1 << 11 to 1 << 16 (3 byters and surrogates) 538 for (; i < size[2]; i++) { 539 x = size[0] + size[1] * 2 + i * 3; 540 y = size[0] + size[1] + i; 541 bytes[x + 0] = (byte) (0xe0 | ((i >> 12) & 0x0f)); 542 bytes[x + 1] = (byte) (0x80 | ((i >> 6) & 0x3f)); 543 bytes[x + 2] = (byte) (0x80 | ((i >> 0) & 0x3f)); 544 chars[y] = (char) i; 545 if (!UTF16.isSurrogate((char)i)) { 546 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 547 us = CharBuffer.wrap(chars, y, 1).slice(); 548 try { 549 smBufDecode(decoder, converter, bs, us, true, false, true); 550 smBufDecode(decoder, converter, bs, us, true, false, false); 551 smBufEncode(encoder, converter, us, bs, true, false, true); 552 smBufEncode(encoder, converter, us, bs, true, false, false); 553 } catch (Exception ex) { 554 errln("Incorrect result in " + converter + " for 0x" 555 + Integer.toHexString(i)); 556 break; 557 } 558 } else { 559 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 560 us = CharBuffer.wrap(chars, y, 1).slice(); 561 562 decoder.reset(); 563 cr = decoder.decode(bs, us, true); 564 bs.rewind(); 565 us.rewind(); 566 if (!cr.isMalformed() || cr.length() != 1) { 567 errln("Incorrect result in " + converter + " decoder for 0x" 568 + Integer.toHexString(i) + " received " + cr); 569 break; 570 } 571 encoder.reset(); 572 cr = encoder.encode(us, bs, true); 573 bs.rewind(); 574 us.rewind(); 575 if (!cr.isMalformed() || cr.length() != 1) { 576 errln("Incorrect result in " + converter + " encoder for 0x" 577 + Integer.toHexString(i) + " received " + cr); 578 break; 579 } 580 581 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 582 us = CharBuffer.wrap(new String(chars, y, 1)); 583 584 decoder.reset(); 585 cr = decoder.decode(bs, us, true); 586 bs.rewind(); 587 us.rewind(); 588 if (!cr.isMalformed() || cr.length() != 1) { 589 errln("Incorrect result in " + converter + " decoder for 0x" 590 + Integer.toHexString(i) + " received " + cr); 591 break; 592 } 593 encoder.reset(); 594 cr = encoder.encode(us, bs, true); 595 bs.rewind(); 596 us.rewind(); 597 if (!cr.isMalformed() || cr.length() != 1) { 598 errln("Incorrect result in " + converter + " encoder for 0x" 599 + Integer.toHexString(i) + " received " + cr); 600 break; 601 } 602 603 604 } 605 } 606 if (true) 607 return; 608 } 609 610 @Test TestHZ()611 public void TestHZ() { 612 /* test input */ 613 char[] in = new char[] { 614 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 615 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 616 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 617 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 618 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 619 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 620 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 621 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 622 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 623 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 624 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 625 0x005A, 0x005B, 0x005C, 0x000A 626 }; 627 628 String converter = "HZ"; 629 CharsetProvider icu = new CharsetProviderICU(); 630 Charset icuChar = icu.charsetForName(converter); 631 CharsetEncoder encoder = icuChar.newEncoder(); 632 CharsetDecoder decoder = icuChar.newDecoder(); 633 try { 634 CharBuffer start = CharBuffer.wrap(in); 635 ByteBuffer bytes = encoder.encode(start); 636 CharBuffer finish = decoder.decode(bytes); 637 638 if (!equals(start, finish)) { 639 errln(converter + " roundtrip test failed: start does not match finish"); 640 641 char[] finishArray = new char[finish.limit()]; 642 for (int i=0; i<finishArray.length; i++) 643 finishArray[i] = finish.get(i); 644 645 logln("start: " + hex(in)); 646 logln("finish: " + hex(finishArray)); 647 } 648 } catch (CharacterCodingException ex) { 649 // Android patch: Skip tests that fail with customized data. 650 logln(converter + " roundtrip test failed: " + ex.getMessage()); 651 // Android patch end. 652 ex.printStackTrace(System.err); 653 } 654 655 /* For better code coverage */ 656 CoderResult result = CoderResult.UNDERFLOW; 657 byte byteout[] = { 658 (byte)0x7e, (byte)0x7d, (byte)0x41, 659 (byte)0x7e, (byte)0x7b, (byte)0x21, 660 }; 661 char charin[] = { 662 (char)0x0041, (char)0x0042, (char)0x3000 663 }; 664 ByteBuffer bb = ByteBuffer.wrap(byteout); 665 CharBuffer cb = CharBuffer.wrap(charin); 666 int testLoopSize = 5; 667 int bbLimits[] = { 0, 1, 3, 4, 6}; 668 int bbPositions[] = { 0, 0, 0, 3, 3 }; 669 int ccPositions[] = { 0, 0, 0, 2, 2 }; 670 for (int i = 0; i < testLoopSize; i++) { 671 encoder.reset(); 672 bb.limit(bbLimits[i]); 673 bb.position(bbPositions[i]); 674 cb.position(ccPositions[i]); 675 result = encoder.encode(cb, bb, true); 676 677 if (i < 3) { 678 if (!result.isOverflow()) { 679 errln("Overflow buffer error should have occurred while encoding HZ (" + i + ")"); 680 } 681 } else { 682 if (result.isError()) { 683 // Android patch: Skip tests that fail with customized data. 684 logln("Error should not have occurred while encoding HZ.(" + i + ")"); 685 // Android patch end. 686 } 687 } 688 } 689 } 690 691 @Test TestUTF8Surrogates()692 public void TestUTF8Surrogates() { 693 byte[][] in = new byte[][] { 694 { (byte)0x61, }, 695 { (byte)0xc2, (byte)0x80, }, 696 { (byte)0xe0, (byte)0xa0, (byte)0x80, }, 697 { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)0x80, }, 698 { (byte)0xf4, (byte)0x84, (byte)0x8c, (byte)0xa1, }, 699 { (byte)0xf0, (byte)0x90, (byte)0x90, (byte)0x81, }, 700 }; 701 702 /* expected test results */ 703 char[][] results = new char[][] { 704 /* number of bytes read, code point */ 705 { '\u0061', }, 706 { '\u0080', }, 707 { '\u0800', }, 708 { '\ud800', '\udc00', }, // 10000 709 { '\udbd0', '\udf21', }, // 104321 710 { '\ud801', '\udc01', }, // 10401 711 }; 712 713 /* error test input */ 714 byte[][] in2 = new byte[][] { 715 { (byte)0x61, }, 716 { (byte)0xc0, (byte)0x80, /* illegal non-shortest form */ 717 (byte)0xe0, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 718 (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 719 (byte)0xc0, (byte)0xc0, /* illegal trail byte */ 720 (byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80, /* 0x110000 out of range */ 721 (byte)0xf8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, /* too long */ 722 (byte)0xfe, /* illegal byte altogether */ 723 (byte)0x62, }, 724 }; 725 726 /* expected error test results */ 727 char[][] results2 = new char[][] { 728 /* number of bytes read, code point */ 729 { '\u0062', }, 730 { '\u0062', }, 731 }; 732 733 String converter = "UTF-8"; 734 CharsetProvider icu = new CharsetProviderICU(); 735 Charset icuChar = icu.charsetForName(converter); 736 CharsetDecoder decoder = icuChar.newDecoder(); 737 738 int i; 739 try { 740 for (i = 0; i < in.length; i++) { 741 ByteBuffer source = ByteBuffer.wrap(in[i]); 742 CharBuffer expected = CharBuffer.wrap(results[i]); 743 smBufDecode(decoder, converter, source, expected, true, false, 744 true); 745 smBufDecode(decoder, converter, source, expected, true, false, 746 false); 747 } 748 } catch (Exception ex) { 749 errln("Incorrect result in " + converter); 750 } 751 try { 752 for (i = 0; i < in2.length; i++) { 753 ByteBuffer source = ByteBuffer.wrap(in2[i]); 754 CharBuffer expected = CharBuffer.wrap(results2[i]); 755 decoder.onMalformedInput(CodingErrorAction.IGNORE); 756 smBufDecode(decoder, converter, source, expected, true, false, 757 true); 758 smBufDecode(decoder, converter, source, expected, true, false, 759 false); 760 } 761 } catch (Exception ex) { 762 errln("Incorrect result in " + converter); 763 } 764 } 765 766 @Test TestSurrogateBehavior()767 public void TestSurrogateBehavior() { 768 CharsetProviderICU icu = new CharsetProviderICU(); 769 770 // get all the converters into an array 771 Object[] converters = CharsetProviderICU.getAvailableNames(); 772 773 String norm = "a"; 774 String ext = "\u0275"; // theta 775 String lead = "\ud835"; 776 String trail = "\udd04"; 777 // lead + trail = \U1d504 (fraktur capital A) 778 779 String input = 780 // error position 781 ext // unmap(1) 1 782 + lead // under 1 783 + lead // malf(1) 2 784 + trail // unmap(2) 4 785 + trail // malf(1) 5 786 + ext // unmap(1) 6 787 + norm // unmap(1) 7 788 ; 789 CoderResult[] results = new CoderResult[] { 790 CoderResult.unmappableForLength(1), // or underflow 791 CoderResult.UNDERFLOW, 792 CoderResult.malformedForLength(1), 793 CoderResult.unmappableForLength(2), // or underflow 794 CoderResult.malformedForLength(1), 795 CoderResult.unmappableForLength(1), // or underflow 796 CoderResult.unmappableForLength(1), // or underflow 797 }; 798 int[] positions = new int[] { 1,1,2,4,5,6,7 }; 799 int n = positions.length; 800 801 int badcount = 0; 802 int goodcount = 0; 803 int[] uhohindices = new int[n]; 804 int[] badposindices = new int[n]; 805 int[] malfindices = new int[n]; 806 int[] unmapindices = new int[n]; 807 ArrayList pass = new ArrayList(); 808 ArrayList exempt = new ArrayList(); 809 810 outer: for (int conv=0; conv<converters.length; conv++) { 811 String converter = (String)converters[conv]; 812 if (converter.equals("x-IMAP-mailbox-name") || converter.equals("UTF-7") || converter.equals("CESU-8") || converter.equals("BOCU-1") || 813 converter.equals("x-LMBCS-1")) { 814 exempt.add(converter); 815 continue; 816 } 817 818 boolean currentlybad = false; 819 Charset icuChar = icu.charsetForName(converter); 820 CharsetEncoder encoder = icuChar.newEncoder(); 821 CoderResult cr; 822 823 CharBuffer source = CharBuffer.wrap(input); 824 ByteBuffer target = ByteBuffer.allocate(30); 825 ByteBuffer expected = null; 826 try { 827 encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); 828 encoder.onMalformedInput(CodingErrorAction.IGNORE); 829 expected = encoder.encode(CharBuffer.wrap(ext + lead + trail + ext + norm)); 830 encoder.reset(); 831 } catch (CharacterCodingException ex) { 832 errln("Unexpected CharacterCodingException: " + ex.getMessage()); 833 return; 834 } catch (RuntimeException ex) { 835 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 836 // Android patch: Skip tests that fail with customized data. 837 logln(converter + " " + ex.getClass().getName() + ": " + ex.getMessage()); 838 // Android patch end. 839 continue outer; 840 } 841 842 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 843 encoder.onMalformedInput(CodingErrorAction.REPORT); 844 for (int i=0; i<n; i++) { 845 source.limit(i+1); 846 cr = encoder.encode(source, target, i == n - 1); 847 if (!(equals(cr, results[i]) 848 || (results[i].isUnmappable() && cr.isUnderflow()) // mappability depends on the converter 849 )) { 850 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 851 if (results[i].isMalformed() && cr.isMalformed()) { 852 malfindices[i]++; 853 } else if (results[i].isUnmappable() && cr.isUnmappable()) { 854 unmapindices[i]++; 855 } else { 856 uhohindices[i]++; 857 } 858 errln("(index=" + i + ") " + converter + " Received: " + cr + " Expected: " + results[i]); 859 } 860 if (source.position() != positions[i]) { 861 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 862 badposindices[i]++; 863 errln("(index=" + i + ") " + converter + " Received: " + source.position() + " Expected: " + positions[i]); 864 } 865 866 } 867 encoder.reset(); 868 869 //System.out.println("\n" + hex(target.array())); 870 //System.out.println(hex(expected.array()) + "\n" + expected.limit()); 871 if (!(equals(target, expected, expected.limit()) && target.position() == expected.limit())) { 872 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 873 errln(converter + " Received: \"" + hex(target.array()) + "\" Expected: \"" + hex(expected.array()) + "\""); 874 } 875 876 if (!currentlybad) { 877 goodcount++; 878 pass.add(converter); 879 } 880 } 881 882 logln("\n" + badcount + " / " + (converters.length - exempt.size()) + " (" + goodcount + " good, " + badcount + " bad)"); 883 log("index\t"); for (int i=0; i<n; i++) log(i + "\t"); logln(""); 884 log("unmap\t"); for (int i=0; i<n; i++) log(unmapindices[i] + "\t"); logln(""); 885 log("malf \t"); for (int i=0; i<n; i++) log(malfindices[i] + "\t"); logln(""); 886 log("pos \t"); for (int i=0; i<n; i++) log(badposindices[i] + "\t"); logln(""); 887 log("uhoh \t"); for (int i=0; i<n; i++) log(uhohindices[i] + "\t"); logln(""); 888 logln(""); 889 log("The few that passed: "); for (int i=0; i<pass.size(); i++) log(pass.get(i) + ", "); logln(""); 890 log("The few that are exempt: "); for (int i=0; i<exempt.size(); i++) log(exempt.get(i) + ", "); logln(""); 891 } 892 893 // public void TestCharsetCallback() { 894 // String currentTest = "initialization"; 895 // try { 896 // Class[] params; 897 // 898 // // get the classes 899 // Class CharsetCallback = Class.forName("com.ibm.icu.charset.CharsetCallback"); 900 // Class Decoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Decoder"); 901 // Class Encoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Encoder"); 902 // 903 // // set up encoderCall 904 // params = new Class[] {CharsetEncoderICU.class, Object.class, 905 // CharBuffer.class, ByteBuffer.class, IntBuffer.class, 906 // char[].class, int.class, int.class, CoderResult.class }; 907 // Method encoderCall = Encoder.getDeclaredMethod("call", params); 908 // 909 // // set up decoderCall 910 // params = new Class[] {CharsetDecoderICU.class, Object.class, 911 // ByteBuffer.class, CharBuffer.class, IntBuffer.class, 912 // char[].class, int.class, CoderResult.class}; 913 // Method decoderCall = Decoder.getDeclaredMethod("call", params); 914 // 915 // // get relevant fields 916 // Object SUB_STOP_ON_ILLEGAL = getFieldValue(CharsetCallback, "SUB_STOP_ON_ILLEGAL", null); 917 // 918 // // set up a few arguments 919 // CharsetProvider provider = new CharsetProviderICU(); 920 // Charset charset = provider.charsetForName("UTF-8"); 921 // CharsetEncoderICU encoder = (CharsetEncoderICU)charset.newEncoder(); 922 // CharsetDecoderICU decoder = (CharsetDecoderICU)charset.newDecoder(); 923 // CharBuffer chars = CharBuffer.allocate(10); 924 // chars.put('o'); 925 // chars.put('k'); 926 // ByteBuffer bytes = ByteBuffer.allocate(10); 927 // bytes.put((byte)'o'); 928 // bytes.put((byte)'k'); 929 // IntBuffer offsets = IntBuffer.allocate(10); 930 // offsets.put(0); 931 // offsets.put(1); 932 // char[] buffer = null; 933 // Integer length = new Integer(2); 934 // Integer cp = new Integer(0); 935 // CoderResult unmap = CoderResult.unmappableForLength(2); 936 // CoderResult malf = CoderResult.malformedForLength(2); 937 // CoderResult under = CoderResult.UNDERFLOW; 938 // 939 // // set up error arrays 940 // Integer invalidCharLength = new Integer(1); 941 // Byte subChar1 = new Byte((byte)0); 942 // Byte subChar1_alternate = new Byte((byte)1); // for TO_U_CALLBACK_SUBSTITUTE 943 // 944 // // set up chars and bytes backups and expected values for certain cases 945 // CharBuffer charsBackup = bufferCopy(chars); 946 // ByteBuffer bytesBackup = bufferCopy(bytes); 947 // IntBuffer offsetsBackup = bufferCopy(offsets); 948 // CharBuffer encoderCharsExpected = bufferCopy(chars); 949 // ByteBuffer encoderBytesExpected = bufferCopy(bytes); 950 // IntBuffer encoderOffsetsExpected = bufferCopy(offsets); 951 // CharBuffer decoderCharsExpected1 = bufferCopy(chars); 952 // CharBuffer decoderCharsExpected2 = bufferCopy(chars); 953 // IntBuffer decoderOffsetsExpected1 = bufferCopy(offsets); 954 // IntBuffer decoderOffsetsExpected2 = bufferCopy(offsets); 955 // 956 // // initialize fields to obtain expected data 957 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 958 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1); 959 // 960 // // run cbFromUWriteSub 961 // Method cbFromUWriteSub = CharsetEncoderICU.class.getDeclaredMethod("cbFromUWriteSub", new Class[] { CharsetEncoderICU.class, CharBuffer.class, ByteBuffer.class, IntBuffer.class}); 962 // cbFromUWriteSub.setAccessible(true); 963 // CoderResult encoderResultExpected = (CoderResult)cbFromUWriteSub.invoke(encoder, new Object[] {encoder, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected}); 964 // 965 // // run toUWriteUChars with normal data 966 // Method toUWriteUChars = CharsetDecoderICU.class.getDeclaredMethod("toUWriteUChars", new Class[] { CharsetDecoderICU.class, char[].class, int.class, int.class, CharBuffer.class, IntBuffer.class, int.class}); 967 // toUWriteUChars.setAccessible(true); 968 // CoderResult decoderResultExpected1 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0xFFFD}, new Integer(0), new Integer(1), decoderCharsExpected1, decoderOffsetsExpected1, new Integer(bytes.position())}); 969 // 970 // // reset certain fields 971 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 972 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1_alternate); 973 // 974 // // run toUWriteUChars again 975 // CoderResult decoderResultExpected2 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0x1A}, new Integer(0), new Integer(1), decoderCharsExpected2, decoderOffsetsExpected2, new Integer(bytes.position())}); 976 // 977 // // begin creating the tests array 978 // ArrayList tests = new ArrayList(); 979 // 980 // // create tests for FROM_U_CALLBACK_SKIP 0 981 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 982 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 983 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 984 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 985 // 986 // // create tests for TO_U_CALLBACK_SKIP 4 987 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 988 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 989 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 990 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 991 // 992 // // create tests for FROM_U_CALLBACK_STOP 8 993 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 994 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 995 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 996 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 997 // 998 // // create tests for TO_U_CALLBACK_STOP 12 999 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1000 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1001 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1002 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1003 // 1004 // // create tests for FROM_U_CALLBACK_SUBSTITUTE 16 1005 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1006 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1007 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1008 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1009 // 1010 // // create tests for TO_U_CALLBACK_SUBSTITUTE 20 1011 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected1, decoderCharsExpected1, bytesBackup, decoderOffsetsExpected1, new Object[] { invalidCharLength, subChar1 }}); 1012 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected2, decoderCharsExpected2, bytesBackup, decoderOffsetsExpected2, new Object[] { invalidCharLength, subChar1_alternate }}); 1013 // 1014 // Iterator iter = tests.iterator(); 1015 // for (int i=0; iter.hasNext(); i++) { 1016 // // get the data out of the map 1017 // Object[] next = (Object[])iter.next(); 1018 // 1019 // Method method = (Method)next[0]; 1020 // String fieldName = (String)next[1]; 1021 // Object field = getFieldValue(CharsetCallback, fieldName, null); 1022 // Object[] args = (Object[])next[2]; 1023 // CoderResult expected = (CoderResult)next[3]; 1024 // CharBuffer charsExpected = (CharBuffer)next[4]; 1025 // ByteBuffer bytesExpected = (ByteBuffer)next[5]; 1026 // IntBuffer offsetsExpected = (IntBuffer)next[6]; 1027 // 1028 // // set up error arrays and certain fields 1029 // Object[] values = (Object[])next[7]; 1030 // if (method == decoderCall) { 1031 // decoder.reset(); 1032 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, values[0]); 1033 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), values[1]); 1034 // } else if (method == encoderCall) { 1035 // encoder.reset(); 1036 // } 1037 // 1038 // try { 1039 // // invoke the method 1040 // CoderResult actual = (CoderResult)method.invoke(field, args); 1041 // 1042 // // if expected != actual 1043 // if (!coderResultsEqual(expected, actual)) { 1044 // // case #i refers to the index in the arraylist tests 1045 // errln(fieldName + " failed to return the correct result for case #" + i + "."); 1046 // } 1047 // // if the expected buffers != actual buffers 1048 // else if (!(buffersEqual(chars, charsExpected) && 1049 // buffersEqual(bytes, bytesExpected) && 1050 // buffersEqual(offsets, offsetsExpected))) { 1051 // // case #i refers to the index in the arraylist tests 1052 // errln(fieldName + " did not perform the correct operation on the buffers for case #" + i + "."); 1053 // } 1054 // } catch (InvocationTargetException ex) { 1055 // // case #i refers to the index in the arraylist tests 1056 // errln(fieldName + " threw an exception for case #" + i + ": " + ex.getCause()); 1057 // //ex.getCause().printStackTrace(); 1058 // } 1059 // 1060 // // reset the buffers 1061 // System.arraycopy(bytesBackup.array(), 0, bytes.array(), 0, 10); 1062 // System.arraycopy(charsBackup.array(), 0, chars.array(), 0, 10); 1063 // System.arraycopy(offsetsBackup.array(), 0, offsets.array(), 0, 10); 1064 // bytes.position(bytesBackup.position()); 1065 // chars.position(charsBackup.position()); 1066 // offsets.position(offsetsBackup.position()); 1067 // } 1068 // 1069 // } catch (Exception ex) { 1070 // errln("TestCharsetCallback skipped due to " + ex.toString()); 1071 // ex.printStackTrace(); 1072 // } 1073 // } 1074 // 1075 // private Object getFieldValue(Class c, String name, Object instance) throws Exception { 1076 // Field field = c.getDeclaredField(name); 1077 // field.setAccessible(true); 1078 // return field.get(instance); 1079 // } 1080 // private void setFieldValue(Class c, String name, Object instance, Object value) throws Exception { 1081 // Field field = c.getDeclaredField(name); 1082 // field.setAccessible(true); 1083 // if (value instanceof Boolean) 1084 // field.setBoolean(instance, ((Boolean)value).booleanValue()); 1085 // else if (value instanceof Byte) 1086 // field.setByte(instance, ((Byte)value).byteValue()); 1087 // else if (value instanceof Character) 1088 // field.setChar(instance, ((Character)value).charValue()); 1089 // else if (value instanceof Double) 1090 // field.setDouble(instance, ((Double)value).doubleValue()); 1091 // else if (value instanceof Float) 1092 // field.setFloat(instance, ((Float)value).floatValue()); 1093 // else if (value instanceof Integer) 1094 // field.setInt(instance, ((Integer)value).intValue()); 1095 // else if (value instanceof Long) 1096 // field.setLong(instance, ((Long)value).longValue()); 1097 // else if (value instanceof Short) 1098 // field.setShort(instance, ((Short)value).shortValue()); 1099 // else 1100 // field.set(instance, value); 1101 // } 1102 // private boolean coderResultsEqual(CoderResult a, CoderResult b) { 1103 // if (a == null && b == null) 1104 // return true; 1105 // if (a == null || b == null) 1106 // return false; 1107 // if ((a.isUnderflow() && b.isUnderflow()) || (a.isOverflow() && b.isOverflow())) 1108 // return true; 1109 // if (a.length() != b.length()) 1110 // return false; 1111 // if ((a.isMalformed() && b.isMalformed()) || (a.isUnmappable() && b.isUnmappable())) 1112 // return true; 1113 // return false; 1114 // } 1115 // private boolean buffersEqual(ByteBuffer a, ByteBuffer b) { 1116 // if (a.position() != b.position()) 1117 // return false; 1118 // int limit = a.position(); 1119 // for (int i=0; i<limit; i++) 1120 // if (a.get(i) != b.get(i)) 1121 // return false; 1122 // return true; 1123 // } 1124 // private boolean buffersEqual(CharBuffer a, CharBuffer b) { 1125 // if (a.position() != b.position()) 1126 // return false; 1127 // int limit = a.position(); 1128 // for (int i=0; i<limit; i++) 1129 // if (a.get(i) != b.get(i)) 1130 // return false; 1131 // return true; 1132 // } 1133 // private boolean buffersEqual(IntBuffer a, IntBuffer b) { 1134 // if (a.position() != b.position()) 1135 // return false; 1136 // int limit = a.position(); 1137 // for (int i=0; i<limit; i++) 1138 // if (a.get(i) != b.get(i)) 1139 // return false; 1140 // return true; 1141 // } 1142 // private ByteBuffer bufferCopy(ByteBuffer src) { 1143 // ByteBuffer dest = ByteBuffer.allocate(src.limit()); 1144 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1145 // dest.position(src.position()); 1146 // return dest; 1147 // } 1148 // private CharBuffer bufferCopy(CharBuffer src) { 1149 // CharBuffer dest = CharBuffer.allocate(src.limit()); 1150 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1151 // dest.position(src.position()); 1152 // return dest; 1153 // } 1154 // private IntBuffer bufferCopy(IntBuffer src) { 1155 // IntBuffer dest = IntBuffer.allocate(src.limit()); 1156 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1157 // dest.position(src.position()); 1158 // return dest; 1159 // } 1160 1161 1162 @Test TestAPISemantics( )1163 public void TestAPISemantics(/*String encoding*/) { 1164 String encoding = "UTF-16"; 1165 CharsetDecoder decoder = null; 1166 CharsetEncoder encoder = null; 1167 try { 1168 CharsetProviderICU provider = new CharsetProviderICU(); 1169 Charset charset = provider.charsetForName(encoding); 1170 decoder = charset.newDecoder(); 1171 encoder = charset.newEncoder(); 1172 } catch(MissingResourceException ex) { 1173 warnln("Could not load charset data: " + encoding); 1174 return; 1175 } 1176 1177 final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n"; 1178 final byte[] byteStr = { 1179 (byte) 0x00,(byte) 'a', 1180 (byte) 0x00,(byte) 'b', 1181 (byte) 0x00,(byte) 'c', 1182 (byte) 0x00,(byte) 'd', 1183 (byte) 0xd8,(byte) 0x00, 1184 (byte) 0xdc,(byte) 0x00, 1185 (byte) 0x12,(byte) 0x34, 1186 (byte) 0x00,(byte) 0xa5, 1187 (byte) 0x30,(byte) 0x00, 1188 (byte) 0x00,(byte) 0x0d, 1189 (byte) 0x00,(byte) 0x0a 1190 }; 1191 final byte[] expectedByteStr = { 1192 (byte) 0xfe,(byte) 0xff, 1193 (byte) 0x00,(byte) 'a', 1194 (byte) 0x00,(byte) 'b', 1195 (byte) 0x00,(byte) 'c', 1196 (byte) 0x00,(byte) 'd', 1197 (byte) 0xd8,(byte) 0x00, 1198 (byte) 0xdc,(byte) 0x00, 1199 (byte) 0x12,(byte) 0x34, 1200 (byte) 0x00,(byte) 0xa5, 1201 (byte) 0x30,(byte) 0x00, 1202 (byte) 0x00,(byte) 0x0d, 1203 (byte) 0x00,(byte) 0x0a 1204 }; 1205 1206 ByteBuffer byes = ByteBuffer.wrap(byteStr); 1207 CharBuffer uniVal = CharBuffer.wrap(unistr); 1208 ByteBuffer expected = ByteBuffer.wrap(expectedByteStr); 1209 1210 int rc = 0; 1211 if(decoder==null){ 1212 warnln("Could not load decoder."); 1213 return; 1214 } 1215 decoder.reset(); 1216 /* Convert the whole buffer to Unicode */ 1217 try { 1218 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1219 CoderResult result = decoder.decode(byes, chars, false); 1220 1221 if (result.isError()) { 1222 errln("ToChars encountered Error"); 1223 rc = 1; 1224 } 1225 if (result.isOverflow()) { 1226 errln("ToChars encountered overflow exception"); 1227 rc = 1; 1228 } 1229 if (!equals(chars, unistr)) { 1230 errln("ToChars does not match"); 1231 printchars(chars); 1232 errln("Expected : "); 1233 printchars(unistr); 1234 rc = 2; 1235 } 1236 1237 } catch (Exception e) { 1238 errln("ToChars - exception in buffer"); 1239 rc = 5; 1240 } 1241 1242 /* Convert single bytes to Unicode */ 1243 try { 1244 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1245 ByteBuffer b = ByteBuffer.wrap(byteStr); 1246 decoder.reset(); 1247 CoderResult result=null; 1248 for (int i = 1; i <= byteStr.length; i++) { 1249 b.limit(i); 1250 result = decoder.decode(b, chars, false); 1251 if(result.isOverflow()){ 1252 errln("ToChars single threw an overflow exception"); 1253 } 1254 if (result.isError()) { 1255 errln("ToChars single the result is an error "+result.toString()); 1256 } 1257 } 1258 if (unistr.length() != (chars.limit())) { 1259 errln("ToChars single len does not match"); 1260 rc = 3; 1261 } 1262 if (!equals(chars, unistr)) { 1263 errln("ToChars single does not match"); 1264 printchars(chars); 1265 rc = 4; 1266 } 1267 } catch (Exception e) { 1268 errln("ToChars - exception in single"); 1269 //e.printStackTrace(); 1270 rc = 6; 1271 } 1272 1273 /* Convert the buffer one at a time to Unicode */ 1274 try { 1275 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1276 decoder.reset(); 1277 byes.rewind(); 1278 for (int i = 1; i <= byteStr.length; i++) { 1279 byes.limit(i); 1280 CoderResult result = decoder.decode(byes, chars, false); 1281 if (result.isError()) { 1282 errln("Error while decoding: "+result.toString()); 1283 } 1284 if(result.isOverflow()){ 1285 errln("ToChars Simple threw an overflow exception"); 1286 } 1287 } 1288 if (chars.limit() != unistr.length()) { 1289 errln("ToChars Simple buffer len does not match"); 1290 rc = 7; 1291 } 1292 if (!equals(chars, unistr)) { 1293 errln("ToChars Simple buffer does not match"); 1294 printchars(chars); 1295 err(" Expected : "); 1296 printchars(unistr); 1297 rc = 8; 1298 } 1299 } catch (Exception e) { 1300 errln("ToChars - exception in single buffer"); 1301 //e.printStackTrace(System.err); 1302 rc = 9; 1303 } 1304 if (rc != 0) { 1305 errln("Test Simple ToChars for encoding : FAILED"); 1306 } 1307 1308 rc = 0; 1309 /* Convert the whole buffer from unicode */ 1310 try { 1311 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1312 encoder.reset(); 1313 CoderResult result = encoder.encode(uniVal, bytes, false); 1314 if (result.isError()) { 1315 errln("FromChars reported error: " + result.toString()); 1316 rc = 1; 1317 } 1318 if(result.isOverflow()){ 1319 errln("FromChars threw an overflow exception"); 1320 } 1321 bytes.position(0); 1322 if (!bytes.equals(expected)) { 1323 errln("FromChars does not match"); 1324 printbytes(bytes); 1325 printbytes(expected); 1326 rc = 2; 1327 } 1328 } catch (Exception e) { 1329 errln("FromChars - exception in buffer"); 1330 //e.printStackTrace(System.err); 1331 rc = 5; 1332 } 1333 1334 /* Convert the buffer one char at a time to unicode */ 1335 try { 1336 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1337 CharBuffer c = CharBuffer.wrap(unistr); 1338 encoder.reset(); 1339 CoderResult result= null; 1340 for (int i = 1; i <= unistr.length(); i++) { 1341 c.limit(i); 1342 result = encoder.encode(c, bytes, false); 1343 if(result.isOverflow()){ 1344 errln("FromChars single threw an overflow exception"); 1345 } 1346 if(result.isError()){ 1347 errln("FromChars single threw an error: "+ result.toString()); 1348 } 1349 } 1350 if (expectedByteStr.length != bytes.limit()) { 1351 errln("FromChars single len does not match"); 1352 rc = 3; 1353 } 1354 1355 bytes.position(0); 1356 if (!bytes.equals(expected)) { 1357 errln("FromChars single does not match"); 1358 printbytes(bytes); 1359 printbytes(expected); 1360 rc = 4; 1361 } 1362 1363 } catch (Exception e) { 1364 errln("FromChars - exception in single"); 1365 //e.printStackTrace(System.err); 1366 rc = 6; 1367 } 1368 1369 /* Convert one char at a time to unicode */ 1370 try { 1371 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1372 encoder.reset(); 1373 char[] temp = unistr.toCharArray(); 1374 CoderResult result=null; 1375 for (int i = 0; i <= temp.length; i++) { 1376 uniVal.limit(i); 1377 result = encoder.encode(uniVal, bytes, false); 1378 if(result.isOverflow()){ 1379 errln("FromChars simple threw an overflow exception"); 1380 } 1381 if(result.isError()){ 1382 errln("FromChars simple threw an error: "+ result.toString()); 1383 } 1384 } 1385 if (bytes.limit() != expectedByteStr.length) { 1386 errln("FromChars Simple len does not match"); 1387 rc = 7; 1388 } 1389 if (!bytes.equals(byes)) { 1390 errln("FromChars Simple does not match"); 1391 printbytes(bytes); 1392 printbytes(byes); 1393 rc = 8; 1394 } 1395 } catch (Exception e) { 1396 errln("FromChars - exception in single buffer"); 1397 //e.printStackTrace(System.err); 1398 rc = 9; 1399 } 1400 if (rc != 0) { 1401 errln("Test Simple FromChars " + encoding + " --FAILED"); 1402 } 1403 } 1404 printchars(CharBuffer buf)1405 void printchars(CharBuffer buf) { 1406 int i; 1407 char[] chars = new char[buf.limit()]; 1408 //save the current position 1409 int pos = buf.position(); 1410 buf.position(0); 1411 buf.get(chars); 1412 //reset to old position 1413 buf.position(pos); 1414 for (i = 0; i < chars.length; i++) { 1415 err(hex(chars[i]) + " "); 1416 } 1417 errln(""); 1418 } printchars(String str)1419 void printchars(String str) { 1420 char[] chars = str.toCharArray(); 1421 for (int i = 0; i < chars.length; i++) { 1422 err(hex(chars[i]) + " "); 1423 } 1424 errln(""); 1425 } printbytes(ByteBuffer buf)1426 void printbytes(ByteBuffer buf) { 1427 int i; 1428 byte[] bytes = new byte[buf.limit()]; 1429 //save the current position 1430 int pos = buf.position(); 1431 buf.position(0); 1432 buf.get(bytes); 1433 //reset to old position 1434 buf.position(pos); 1435 for (i = 0; i < bytes.length; i++) { 1436 System.out.print(hex(bytes[i]) + " "); 1437 } 1438 errln(""); 1439 } 1440 equals(CoderResult a, CoderResult b)1441 public boolean equals(CoderResult a, CoderResult b) { 1442 return (a.isUnderflow() && b.isUnderflow()) 1443 || (a.isOverflow() && b.isOverflow()) 1444 || (a.isMalformed() && b.isMalformed() && a.length() == b.length()) 1445 || (a.isUnmappable() && b.isUnmappable() && a.length() == b.length()); 1446 } equals(CharBuffer buf, String str)1447 public boolean equals(CharBuffer buf, String str) { 1448 return equals(buf, str.toCharArray()); 1449 } equals(CharBuffer buf, CharBuffer str)1450 public boolean equals(CharBuffer buf, CharBuffer str) { 1451 if (buf.limit() != str.limit()) 1452 return false; 1453 int limit = buf.limit(); 1454 for (int i = 0; i < limit; i++) 1455 if (buf.get(i) != str.get(i)) 1456 return false; 1457 return true; 1458 } equals(CharBuffer buf, CharBuffer str, int limit)1459 public boolean equals(CharBuffer buf, CharBuffer str, int limit) { 1460 if (limit > buf.limit() || limit > str.limit()) 1461 return false; 1462 for (int i = 0; i < limit; i++) 1463 if (buf.get(i) != str.get(i)) 1464 return false; 1465 return true; 1466 } equals(CharBuffer buf, char[] compareTo)1467 public boolean equals(CharBuffer buf, char[] compareTo) { 1468 char[] chars = new char[buf.limit()]; 1469 //save the current position 1470 int pos = buf.position(); 1471 buf.position(0); 1472 buf.get(chars); 1473 //reset to old position 1474 buf.position(pos); 1475 return equals(chars, compareTo); 1476 } 1477 equals(char[] chars, char[] compareTo)1478 public boolean equals(char[] chars, char[] compareTo) { 1479 if (chars.length != compareTo.length) { 1480 errln( 1481 "Length does not match chars: " 1482 + chars.length 1483 + " compareTo: " 1484 + compareTo.length); 1485 return false; 1486 } else { 1487 boolean result = true; 1488 for (int i = 0; i < chars.length; i++) { 1489 if (chars[i] != compareTo[i]) { 1490 logln( 1491 "Got: " 1492 + hex(chars[i]) 1493 + " Expected: " 1494 + hex(compareTo[i]) 1495 + " At: " 1496 + i); 1497 result = false; 1498 } 1499 } 1500 return result; 1501 } 1502 } 1503 equals(ByteBuffer buf, byte[] compareTo)1504 public boolean equals(ByteBuffer buf, byte[] compareTo) { 1505 byte[] chars = new byte[buf.limit()]; 1506 //save the current position 1507 int pos = buf.position(); 1508 buf.position(0); 1509 buf.get(chars); 1510 //reset to old position 1511 buf.position(pos); 1512 return equals(chars, compareTo); 1513 } equals(ByteBuffer buf, ByteBuffer compareTo)1514 public boolean equals(ByteBuffer buf, ByteBuffer compareTo) { 1515 if (buf.limit() != compareTo.limit()) 1516 return false; 1517 int limit = buf.limit(); 1518 for (int i = 0; i < limit; i++) 1519 if (buf.get(i) != compareTo.get(i)) 1520 return false; 1521 return true; 1522 } equals(ByteBuffer buf, ByteBuffer compareTo, int limit)1523 public boolean equals(ByteBuffer buf, ByteBuffer compareTo, int limit) { 1524 if (limit > buf.limit() || limit > compareTo.limit()) 1525 return false; 1526 for (int i = 0; i < limit; i++) 1527 if (buf.get(i) != compareTo.get(i)) 1528 return false; 1529 return true; 1530 } equals(byte[] chars, byte[] compareTo)1531 public boolean equals(byte[] chars, byte[] compareTo) { 1532 if (false/*chars.length != compareTo.length*/) { 1533 errln( 1534 "Length does not match chars: " 1535 + chars.length 1536 + " compareTo: " 1537 + compareTo.length); 1538 return false; 1539 } else { 1540 boolean result = true; 1541 for (int i = 0; i < chars.length; i++) { 1542 if (chars[i] != compareTo[i]) { 1543 logln( 1544 "Got: " 1545 + hex(chars[i]) 1546 + " Expected: " 1547 + hex(compareTo[i]) 1548 + " At: " 1549 + i); 1550 result = false; 1551 } 1552 } 1553 return result; 1554 } 1555 } 1556 1557 // TODO 1558 /* 1559 @Test 1560 public void TestCallback(String encoding) throws Exception { 1561 1562 byte[] gbSource = 1563 { 1564 (byte) 0x81, 1565 (byte) 0x36, 1566 (byte) 0xDE, 1567 (byte) 0x36, 1568 (byte) 0x81, 1569 (byte) 0x36, 1570 (byte) 0xDE, 1571 (byte) 0x37, 1572 (byte) 0x81, 1573 (byte) 0x36, 1574 (byte) 0xDE, 1575 (byte) 0x38, 1576 (byte) 0xe3, 1577 (byte) 0x32, 1578 (byte) 0x9a, 1579 (byte) 0x36 }; 1580 1581 char[] subChars = { 'P', 'I' }; 1582 1583 decoder.reset(); 1584 1585 decoder.replaceWith(new String(subChars)); 1586 ByteBuffer mySource = ByteBuffer.wrap(gbSource); 1587 CharBuffer myTarget = CharBuffer.allocate(5); 1588 1589 decoder.decode(mySource, myTarget, true); 1590 char[] expectedResult = 1591 { '\u22A6', '\u22A7', '\u22A8', '\u0050', '\u0049', }; 1592 1593 if (!equals(myTarget, new String(expectedResult))) { 1594 errln("Test callback GB18030 to Unicode : FAILED"); 1595 } 1596 1597 } 1598 */ 1599 1600 @Test TestCanConvert( )1601 public void TestCanConvert(/*String encoding*/)throws Exception { 1602 char[] mySource = { 1603 '\ud800', '\udc00',/*surrogate pair */ 1604 '\u22A6','\u22A7','\u22A8','\u22A9','\u22AA', 1605 '\u22AB','\u22AC','\u22AD','\u22AE','\u22AF', 1606 '\u22B0','\u22B1','\u22B2','\u22B3','\u22B4', 1607 '\ud800','\udc00',/*surrogate pair */ 1608 '\u22B5','\u22B6','\u22B7','\u22B8','\u22B9', 1609 '\u22BA','\u22BB','\u22BC','\u22BD','\u22BE' 1610 }; 1611 String encoding = "UTF-16"; 1612 CharsetEncoder encoder = null; 1613 try { 1614 CharsetProviderICU provider = new CharsetProviderICU(); 1615 Charset charset = provider.charsetForName(encoding); 1616 encoder = charset.newEncoder(); 1617 } catch(MissingResourceException ex) { 1618 warnln("Could not load charset data: " + encoding); 1619 return; 1620 } 1621 if (!encoder.canEncode(new String(mySource))) { 1622 errln("Test canConvert() " + encoding + " failed. "+encoder); 1623 } 1624 1625 } 1626 1627 @Test TestAvailableCharsets()1628 public void TestAvailableCharsets() { 1629 SortedMap map = Charset.availableCharsets(); 1630 Set keySet = map.keySet(); 1631 Iterator iter = keySet.iterator(); 1632 while(iter.hasNext()){ 1633 logln("Charset name: "+iter.next().toString()); 1634 } 1635 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1636 int mapSize = map.size(); 1637 if(mapSize < charsets.length){ 1638 errln("Charset.availableCharsets() returned a number less than the number returned by icu. ICU: " + charsets.length 1639 + " JDK: " + mapSize); 1640 } 1641 logln("Total Number of chasets = " + map.size()); 1642 } 1643 1644 @Test TestWindows936()1645 public void TestWindows936(){ 1646 CharsetProviderICU icu = new CharsetProviderICU(); 1647 Charset cs = icu.charsetForName("windows-936-2000"); 1648 String canonicalName = cs.name(); 1649 if(!canonicalName.equals("GBK")){ 1650 errln("Did not get the expected canonical name. Got: "+canonicalName); //get the canonical name 1651 } 1652 } 1653 1654 @Test TestICUAvailableCharsets()1655 public void TestICUAvailableCharsets() { 1656 CharsetProviderICU icu = new CharsetProviderICU(); 1657 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1658 for(int i=0;i<charsets.length;i++){ 1659 Charset cs = icu.charsetForName((String)charsets[i]); 1660 try{ 1661 CharsetEncoder encoder = cs.newEncoder(); 1662 if(encoder!=null){ 1663 logln("Creation of encoder succeeded. "+cs.toString()); 1664 } 1665 }catch(Exception ex){ 1666 errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString()); 1667 } 1668 try{ 1669 CharsetDecoder decoder = cs.newDecoder(); 1670 if(decoder!=null){ 1671 logln("Creation of decoder succeeded. "+cs.toString()); 1672 } 1673 }catch(Exception ex){ 1674 errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString()); 1675 } 1676 } 1677 } 1678 1679 /* jitterbug 4312 */ 1680 @Test TestUnsupportedCharset()1681 public void TestUnsupportedCharset(){ 1682 CharsetProvider icu = new CharsetProviderICU(); 1683 Charset icuChar = icu.charsetForName("impossible"); 1684 if(icuChar != null){ 1685 errln("ICU does not conform to the spec"); 1686 } 1687 } 1688 1689 @Test TestEncoderCreation()1690 public void TestEncoderCreation(){ 1691 // Use CharsetICU.forNameICU() so that we get the ICU version 1692 // even if the system or another provider also supports this charset. 1693 String encoding = "GB_2312-80"; 1694 try{ 1695 Charset cs = CharsetICU.forNameICU(encoding); 1696 CharsetEncoder enc = cs.newEncoder(); 1697 if(enc!=null){ 1698 logln("Successfully created an encoder for " + encoding + ": " + enc); 1699 if(!(enc instanceof CharsetEncoderICU)) { 1700 errln("Expected " + encoding + 1701 " to be implemented by ICU but got an instance of " + enc.getClass()); 1702 } 1703 }else{ 1704 errln("Error creating charset encoder for " + encoding); 1705 } 1706 }catch(Exception e){ 1707 warnln("Error creating charset encoder for " + encoding + ": " + e); 1708 } 1709 // Use Charset.forName() which may return an ICU Charset or some other implementation. 1710 encoding = "x-ibm-971_P100-1995"; 1711 try{ 1712 Charset cs = Charset.forName(encoding); 1713 CharsetEncoder enc = cs.newEncoder(); 1714 if(enc!=null){ 1715 logln("Successfully created an encoder for " + encoding + ": " + enc + 1716 " which is implemented by ICU? " + (enc instanceof CharsetEncoderICU)); 1717 }else{ 1718 errln("Error creating charset encoder for " + encoding); 1719 } 1720 }catch(Exception e){ 1721 warnln("Error creating charset encoder for " + encoding + ": " + e); 1722 } 1723 } 1724 1725 @Test TestSubBytes()1726 public void TestSubBytes(){ 1727 try{ 1728 //create utf-8 decoder 1729 CharsetDecoder decoder = new CharsetProviderICU().charsetForName("utf-8").newDecoder(); 1730 1731 //create a valid byte array, which can be decoded to " buffer" 1732 byte[] unibytes = new byte[] { 0x0020, 0x0062, 0x0075, 0x0066, 0x0066, 0x0065, 0x0072 }; 1733 1734 ByteBuffer buffer = ByteBuffer.allocate(20); 1735 1736 //add a evil byte to make the byte buffer be malformed input 1737 buffer.put((byte)0xd8); 1738 1739 //put the valid byte array 1740 buffer.put(unibytes); 1741 1742 //reset postion 1743 buffer.flip(); 1744 1745 decoder.onMalformedInput(CodingErrorAction.REPLACE); 1746 CharBuffer out = decoder.decode(buffer); 1747 String expected = "\ufffd buffer"; 1748 if(!expected.equals(new String(out.array()))){ 1749 errln("Did not get the expected result for substitution chars. Got: "+ 1750 new String(out.array()) + "("+ hex(out.array())+")"); 1751 } 1752 logln("Output: "+ new String(out.array()) + "("+ hex(out.array())+")"); 1753 }catch (CharacterCodingException ex){ 1754 errln("Unexpected exception: "+ex.toString()); 1755 } 1756 } 1757 /* 1758 1759 @Test 1760 public void TestImplFlushFailure(){ 1761 1762 try{ 1763 CharBuffer in = CharBuffer.wrap("\u3005\u3006\u3007\u30FC\u2015\u2010\uFF0F"); 1764 CharsetEncoder encoder = new CharsetProviderICU().charsetForName("iso-2022-jp").newEncoder(); 1765 ByteBuffer out = ByteBuffer.allocate(30); 1766 encoder.encode(in, out, true); 1767 encoder.flush(out); 1768 if(out.position()!= 20){ 1769 errln("Did not get the expected position from flush"); 1770 } 1771 1772 }catch (Exception ex){ 1773 errln("Could not create encoder for iso-2022-jp exception: "+ex.toString()); 1774 } 1775 } 1776 */ 1777 1778 @Test TestISO88591()1779 public void TestISO88591() { 1780 1781 Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1"); 1782 if(cs!=null){ 1783 CharsetEncoder encoder = cs.newEncoder(); 1784 if(encoder!=null){ 1785 encoder.canEncode("\uc2a3"); 1786 }else{ 1787 errln("Could not create encoder for iso-8859-1"); 1788 } 1789 }else{ 1790 errln("Could not create Charset for iso-8859-1"); 1791 } 1792 1793 } 1794 1795 @Test TestUTF8Encode()1796 public void TestUTF8Encode() { 1797 // Test with a lead surrogate in the middle of the input text. 1798 // Java API behavior is unclear for surrogates at the end, see ticket #11546. 1799 CharBuffer in = CharBuffer.wrap("\ud800a"); 1800 ByteBuffer out = ByteBuffer.allocate(30); 1801 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1802 CoderResult result = encoderICU.encode(in, out, true); 1803 1804 if (result.isMalformed()) { 1805 logln("\\ud800 is malformed for ICU4JNI utf-8 encoder"); 1806 } else if (result.isUnderflow()) { 1807 errln("FAIL: \\ud800 is OK for ICU4JNI utf-8 encoder"); 1808 } 1809 1810 in.position(0); 1811 out.clear(); 1812 1813 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1814 result = encoderJDK.encode(in, out, true); 1815 if (result.isMalformed()) { 1816 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1817 } else if (result.isUnderflow()) { 1818 errln("BAD: \\ud800 is OK for JDK utf-8 encoder"); 1819 } 1820 } 1821 1822 /* private void printCB(CharBuffer buf){ 1823 buf.rewind(); 1824 while(buf.hasRemaining()){ 1825 System.out.println(hex(buf.get())); 1826 } 1827 buf.rewind(); 1828 } 1829 */ 1830 1831 @Test TestUTF8()1832 public void TestUTF8() throws CharacterCodingException{ 1833 try{ 1834 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1835 encoderICU.encode(CharBuffer.wrap("\ud800")); 1836 errln("\\ud800 is OK for ICU4JNI utf-8 encoder"); 1837 }catch (Exception e) { 1838 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1839 //e.printStackTrace(); 1840 } 1841 1842 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1843 try { 1844 encoderJDK.encode(CharBuffer.wrap("\ud800")); 1845 errln("\\ud800 is OK for JDK utf-8 encoder"); 1846 } catch (Exception e) { 1847 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1848 //e.printStackTrace(); 1849 } 1850 } 1851 1852 @Test TestUTF16Bom()1853 public void TestUTF16Bom(){ 1854 1855 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-16"); 1856 char[] in = new char[] { 0x1122, 0x2211, 0x3344, 0x4433, 1857 0x5566, 0x6655, 0x7788, 0x8877, 0x9900 }; 1858 CharBuffer inBuf = CharBuffer.allocate(in.length); 1859 inBuf.put(in); 1860 CharsetEncoder encoder = cs.newEncoder(); 1861 ByteBuffer outBuf = ByteBuffer.allocate(in.length*2+2); 1862 inBuf.rewind(); 1863 encoder.encode(inBuf, outBuf, true); 1864 outBuf.rewind(); 1865 if(outBuf.get(0)!= (byte)0xFE && outBuf.get(1)!= (byte)0xFF){ 1866 errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining()); 1867 } 1868 while(outBuf.hasRemaining()){ 1869 logln("0x"+hex(outBuf.get())); 1870 } 1871 CharsetDecoder decoder = cs.newDecoder(); 1872 outBuf.rewind(); 1873 CharBuffer rt = CharBuffer.allocate(in.length); 1874 CoderResult cr = decoder.decode(outBuf, rt, true); 1875 if(cr.isError()){ 1876 errln("Decoding with BOM failed. Error: "+ cr.toString()); 1877 } 1878 equals(rt, in); 1879 { 1880 rt.clear(); 1881 outBuf.rewind(); 1882 Charset utf16 = Charset.forName("UTF-16"); 1883 CharsetDecoder dc = utf16.newDecoder(); 1884 cr = dc.decode(outBuf, rt, true); 1885 equals(rt, in); 1886 } 1887 } 1888 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush)1889 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1890 boolean throwException, boolean flush) throws BufferOverflowException, Exception { 1891 smBufDecode(decoder, encoding, source, target, throwException, flush, true); 1892 } 1893 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray)1894 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1895 boolean throwException, boolean flush, boolean backedByArray) throws BufferOverflowException, Exception { 1896 smBufDecode(decoder, encoding, source, target, throwException, flush, backedByArray, -1); 1897 } 1898 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)1899 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1900 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) 1901 throws BufferOverflowException, Exception { 1902 ByteBuffer mySource; 1903 CharBuffer myTarget; 1904 if (backedByArray) { 1905 mySource = ByteBuffer.allocate(source.capacity()); 1906 myTarget = CharBuffer.allocate(target.capacity()); 1907 } else { 1908 // this does not guarantee by any means that mySource and myTarget 1909 // are not backed by arrays 1910 mySource = ByteBuffer.allocateDirect(source.capacity()); 1911 myTarget = ByteBuffer.allocateDirect(target.capacity() * 2).asCharBuffer(); 1912 } 1913 mySource.position(source.position()); 1914 for (int i = source.position(); i < source.limit(); i++) 1915 mySource.put(i, source.get(i)); 1916 1917 { 1918 decoder.reset(); 1919 myTarget.limit(target.limit()); 1920 mySource.limit(source.limit()); 1921 mySource.position(source.position()); 1922 CoderResult result = CoderResult.UNDERFLOW; 1923 result = decoder.decode(mySource, myTarget, true); 1924 if (flush) { 1925 result = decoder.flush(myTarget); 1926 } 1927 if (result.isError()) { 1928 if (throwException) { 1929 throw new Exception(); 1930 } 1931 errln("Test complete buffers while decoding failed. " + result.toString()); 1932 return; 1933 } 1934 if (result.isOverflow()) { 1935 if (throwException) { 1936 throw new BufferOverflowException(); 1937 } 1938 errln("Test complete buffers while decoding threw overflow exception"); 1939 return; 1940 } 1941 myTarget.limit(myTarget.position()); 1942 myTarget.position(0); 1943 target.position(0); 1944 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1945 errln(" Test complete buffers while decoding " + encoding + " TO Unicode--failed"); 1946 } 1947 } 1948 if (isQuick()) { 1949 return; 1950 } 1951 { 1952 decoder.reset(); 1953 myTarget.limit(target.position()); 1954 mySource.limit(source.position()); 1955 mySource.position(source.position()); 1956 myTarget.clear(); 1957 myTarget.position(0); 1958 1959 int inputLen = mySource.remaining(); 1960 1961 CoderResult result = CoderResult.UNDERFLOW; 1962 for (int i = 1; i <= inputLen; i++) { 1963 mySource.limit(i); 1964 if (i == inputLen) { 1965 result = decoder.decode(mySource, myTarget, true); 1966 } else { 1967 result = decoder.decode(mySource, myTarget, false); 1968 } 1969 if (result.isError()) { 1970 errln("Test small input buffers while decoding failed. " + result.toString()); 1971 break; 1972 } 1973 if (result.isOverflow()) { 1974 if (throwException) { 1975 throw new BufferOverflowException(); 1976 } 1977 errln("Test small input buffers while decoding threw overflow exception"); 1978 break; 1979 } 1980 1981 } 1982 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1983 errln("Test small input buffers while decoding " + encoding + " TO Unicode--failed"); 1984 } 1985 } 1986 { 1987 decoder.reset(); 1988 myTarget.limit(0); 1989 mySource.limit(0); 1990 mySource.position(source.position()); 1991 myTarget.clear(); 1992 while (true) { 1993 CoderResult result = decoder.decode(mySource, myTarget, false); 1994 if (result.isUnderflow()) { 1995 if (mySource.limit() < source.limit()) 1996 mySource.limit(mySource.limit() + 1); 1997 } else if (result.isOverflow()) { 1998 if (myTarget.limit() < target.limit()) 1999 myTarget.limit(myTarget.limit() + 1); 2000 else 2001 break; 2002 } else /*if (result.isError())*/ { 2003 errln("Test small output buffers while decoding " + result.toString()); 2004 } 2005 if (mySource.position() == mySource.limit()) { 2006 result = decoder.decode(mySource, myTarget, true); 2007 if (result.isError()) { 2008 errln("Test small output buffers while decoding " + result.toString()); 2009 } 2010 result = decoder.flush(myTarget); 2011 if (result.isError()) { 2012 errln("Test small output buffers while decoding " + result.toString()); 2013 } 2014 break; 2015 } 2016 } 2017 2018 if (!equals(myTarget, target, targetLimit)) { 2019 errln("Test small output buffers " + encoding + " TO Unicode failed"); 2020 } 2021 } 2022 } 2023 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush)2024 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2025 boolean throwException, boolean flush) throws Exception, BufferOverflowException { 2026 smBufEncode(encoder, encoding, source, target, throwException, flush, true); 2027 } 2028 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray)2029 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2030 boolean throwException, boolean flush, boolean backedByArray) throws Exception, BufferOverflowException { 2031 smBufEncode(encoder, encoding, source, target, throwException, flush, true, -1); 2032 } 2033 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)2034 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2035 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) throws Exception, 2036 BufferOverflowException { 2037 logln("Running smBufEncode for " + encoding + " with class " + encoder); 2038 2039 CharBuffer mySource; 2040 ByteBuffer myTarget; 2041 if (backedByArray) { 2042 mySource = CharBuffer.allocate(source.capacity()); 2043 myTarget = ByteBuffer.allocate(target.capacity()); 2044 } else { 2045 mySource = ByteBuffer.allocateDirect(source.capacity() * 2).asCharBuffer(); 2046 myTarget = ByteBuffer.allocateDirect(target.capacity()); 2047 } 2048 mySource.position(source.position()); 2049 for (int i = source.position(); i < source.limit(); i++) 2050 mySource.put(i, source.get(i)); 2051 2052 myTarget.clear(); 2053 { 2054 logln("Running tests on small input buffers for " + encoding); 2055 encoder.reset(); 2056 myTarget.limit(target.limit()); 2057 mySource.limit(source.limit()); 2058 mySource.position(source.position()); 2059 CoderResult result = null; 2060 2061 result = encoder.encode(mySource, myTarget, true); 2062 if (flush) { 2063 result = encoder.flush(myTarget); 2064 } 2065 2066 if (result.isError()) { 2067 if (throwException) { 2068 throw new Exception(); 2069 } 2070 errln("Test complete while encoding failed. " + result.toString()); 2071 } 2072 if (result.isOverflow()) { 2073 if (throwException) { 2074 throw new BufferOverflowException(); 2075 } 2076 errln("Test complete while encoding threw overflow exception"); 2077 } 2078 if (!equals(myTarget, target, targetLimit)) { 2079 errln("Test complete buffers while encoding for " + encoding + " failed"); 2080 2081 } else { 2082 logln("Tests complete buffers for " + encoding + " passed"); 2083 } 2084 } 2085 if (isQuick()) { 2086 return; 2087 } 2088 { 2089 logln("Running tests on small input buffers for " + encoding); 2090 encoder.reset(); 2091 myTarget.clear(); 2092 myTarget.limit(target.limit()); 2093 mySource.limit(source.limit()); 2094 mySource.position(source.position()); 2095 int inputLen = mySource.limit(); 2096 CoderResult result = null; 2097 for (int i = 1; i <= inputLen; i++) { 2098 mySource.limit(i); 2099 result = encoder.encode(mySource, myTarget, false); 2100 if (result.isError()) { 2101 errln("Test small input buffers while encoding failed. " + result.toString()); 2102 } 2103 if (result.isOverflow()) { 2104 if (throwException) { 2105 throw new BufferOverflowException(); 2106 } 2107 errln("Test small input buffers while encoding threw overflow exception"); 2108 } 2109 } 2110 if (!equals(myTarget, target, targetLimit)) { 2111 errln("Test small input buffers " + encoding + " From Unicode failed"); 2112 } else { 2113 logln("Tests on small input buffers for " + encoding + " passed"); 2114 } 2115 } 2116 { 2117 logln("Running tests on small output buffers for " + encoding); 2118 encoder.reset(); 2119 myTarget.clear(); 2120 myTarget.limit(target.limit()); 2121 mySource.limit(source.limit()); 2122 mySource.position(source.position()); 2123 mySource.position(0); 2124 myTarget.position(0); 2125 2126 logln("myTarget.limit: " + myTarget.limit() + " myTarget.capcity: " + myTarget.capacity()); 2127 2128 while (true) { 2129 int pos = myTarget.position(); 2130 2131 CoderResult result = encoder.encode(mySource, myTarget, false); 2132 logln("myTarget.Position: " + pos + " myTarget.limit: " + myTarget.limit()); 2133 logln("mySource.position: " + mySource.position() + " mySource.limit: " + mySource.limit()); 2134 2135 if (result.isError()) { 2136 errln("Test small output buffers while encoding " + result.toString()); 2137 } 2138 if (mySource.position() == mySource.limit()) { 2139 result = encoder.encode(mySource, myTarget, true); 2140 if (result.isError()) { 2141 errln("Test small output buffers while encoding " + result.toString()); 2142 } 2143 2144 myTarget.limit(myTarget.capacity()); 2145 result = encoder.flush(myTarget); 2146 if (result.isError()) { 2147 errln("Test small output buffers while encoding " + result.toString()); 2148 } 2149 break; 2150 } 2151 } 2152 if (!equals(myTarget, target, targetLimit)) { 2153 errln("Test small output buffers " + encoding + " From Unicode failed."); 2154 } 2155 logln("Tests on small output buffers for " + encoding + " passed"); 2156 } 2157 } 2158 2159 2160 //TODO 2161 /* 2162 @Test 2163 public void TestString(ByteBuffer bSource, CharBuffer uSource) throws Exception { 2164 try { 2165 { 2166 String source = uSource.toString(); 2167 byte[] target = source.getBytes(m_encoding); 2168 if (!equals(target, bSource.array())) { 2169 errln("encode using string API failed"); 2170 } 2171 } 2172 { 2173 2174 String target = new String(bSource.array(), m_encoding); 2175 if (!equals(uSource, target.toCharArray())) { 2176 errln("decode using string API failed"); 2177 } 2178 } 2179 } catch (Exception e) { 2180 //e.printStackTrace(); 2181 errln(e.getMessage()); 2182 } 2183 } 2184 2185 /*private void fromUnicodeTest() throws Exception { 2186 2187 logln("Loaded Charset: " + charset.getClass().toString()); 2188 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2189 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2190 2191 ByteBuffer myTarget = ByteBuffer.allocate(gbSource.length); 2192 logln("Created ByteBuffer of length: " + uSource.length); 2193 CharBuffer mySource = CharBuffer.wrap(uSource); 2194 logln("Wrapped ByteBuffer with CharBuffer "); 2195 encoder.reset(); 2196 logln("Test Unicode to " + encoding ); 2197 encoder.encode(mySource, myTarget, true); 2198 if (!equals(myTarget, gbSource)) { 2199 errln("--Test Unicode to " + encoding + ": FAILED"); 2200 } 2201 logln("Test Unicode to " + encoding +" passed"); 2202 } 2203 2204 @Test 2205 public void TestToUnicode( ) throws Exception { 2206 2207 logln("Loaded Charset: " + charset.getClass().toString()); 2208 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2209 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2210 2211 CharBuffer myTarget = CharBuffer.allocate(uSource.length); 2212 ByteBuffer mySource = ByteBuffer.wrap(getByteArray(gbSource)); 2213 decoder.reset(); 2214 CoderResult result = decoder.decode(mySource, myTarget, true); 2215 if (result.isError()) { 2216 errln("Test ToUnicode -- FAILED"); 2217 } 2218 if (!equals(myTarget, uSource)) { 2219 errln("--Test " + encoding + " to Unicode :FAILED"); 2220 } 2221 } 2222 2223 public static byte[] getByteArray(char[] source) { 2224 byte[] target = new byte[source.length]; 2225 int i = source.length; 2226 for (; --i >= 0;) { 2227 target[i] = (byte) source[i]; 2228 } 2229 return target; 2230 } 2231 /* 2232 private void smBufCharset(Charset charset) { 2233 try { 2234 ByteBuffer bTarget = charset.encode(CharBuffer.wrap(uSource)); 2235 CharBuffer uTarget = 2236 charset.decode(ByteBuffer.wrap(getByteArray(gbSource))); 2237 2238 if (!equals(uTarget, uSource)) { 2239 errln("Test " + charset.toString() + " to Unicode :FAILED"); 2240 } 2241 if (!equals(bTarget, gbSource)) { 2242 errln("Test " + charset.toString() + " from Unicode :FAILED"); 2243 } 2244 } catch (Exception ex) { 2245 errln("Encountered exception in smBufCharset"); 2246 } 2247 } 2248 2249 @Test 2250 public void TestMultithreaded() throws Exception { 2251 final Charset cs = Charset.forName(encoding); 2252 if (cs == charset) { 2253 errln("The objects are equal"); 2254 } 2255 smBufCharset(cs); 2256 try { 2257 final Thread t1 = new Thread() { 2258 public void run() { 2259 // commented out since the mehtods on 2260 // Charset API are supposed to be thread 2261 // safe ... to test it we dont sync 2262 2263 // synchronized(charset){ 2264 while (!interrupted()) { 2265 try { 2266 smBufCharset(cs); 2267 } catch (UnsupportedCharsetException ueEx) { 2268 errln(ueEx.toString()); 2269 } 2270 } 2271 2272 // } 2273 } 2274 }; 2275 final Thread t2 = new Thread() { 2276 public void run() { 2277 // synchronized(charset){ 2278 while (!interrupted()) { 2279 try { 2280 smBufCharset(cs); 2281 } catch (UnsupportedCharsetException ueEx) { 2282 errln(ueEx.toString()); 2283 } 2284 } 2285 2286 //} 2287 } 2288 }; 2289 t1.start(); 2290 t2.start(); 2291 int i = 0; 2292 for (;;) { 2293 if (i > 1000000000) { 2294 try { 2295 t1.interrupt(); 2296 } catch (Exception e) { 2297 } 2298 try { 2299 t2.interrupt(); 2300 } catch (Exception e) { 2301 } 2302 break; 2303 } 2304 i++; 2305 } 2306 } catch (Exception e) { 2307 throw e; 2308 } 2309 } 2310 2311 @Test 2312 public void TestSynchronizedMultithreaded() throws Exception { 2313 // Methods on CharsetDecoder and CharsetEncoder classes 2314 // are inherently unsafe if accessed by multiple concurrent 2315 // thread so we synchronize them 2316 final Charset charset = Charset.forName(encoding); 2317 final CharsetDecoder decoder = charset.newDecoder(); 2318 final CharsetEncoder encoder = charset.newEncoder(); 2319 try { 2320 final Thread t1 = new Thread() { 2321 public void run() { 2322 while (!interrupted()) { 2323 try { 2324 synchronized (encoder) { 2325 smBufEncode(encoder, encoding); 2326 } 2327 synchronized (decoder) { 2328 smBufDecode(decoder, encoding); 2329 } 2330 } catch (UnsupportedCharsetException ueEx) { 2331 errln(ueEx.toString()); 2332 } 2333 } 2334 2335 } 2336 }; 2337 final Thread t2 = new Thread() { 2338 public void run() { 2339 while (!interrupted()) { 2340 try { 2341 synchronized (encoder) { 2342 smBufEncode(encoder, encoding); 2343 } 2344 synchronized (decoder) { 2345 smBufDecode(decoder, encoding); 2346 } 2347 } catch (UnsupportedCharsetException ueEx) { 2348 errln(ueEx.toString()); 2349 } 2350 } 2351 } 2352 }; 2353 t1.start(); 2354 t2.start(); 2355 int i = 0; 2356 for (;;) { 2357 if (i > 1000000000) { 2358 try { 2359 t1.interrupt(); 2360 } catch (Exception e) { 2361 } 2362 try { 2363 t2.interrupt(); 2364 } catch (Exception e) { 2365 } 2366 break; 2367 } 2368 i++; 2369 } 2370 } catch (Exception e) { 2371 throw e; 2372 } 2373 } 2374 */ 2375 2376 @Test TestMBCS()2377 public void TestMBCS(){ 2378 { 2379 // Encoder: from Unicode conversion 2380 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("ibm-971").newEncoder(); 2381 ByteBuffer out = ByteBuffer.allocate(6); 2382 encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE); 2383 CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true); 2384 if(!result.isError()){ 2385 byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; 2386 if(!equals(expected, out.array())){ 2387 // Android patch: Skip tests that fail with customized data. 2388 logln("Did not get the expected result for substitution bytes. Got: "+ 2389 hex(out.array())); 2390 // Android patch end. 2391 } 2392 logln("Output: "+ hex(out.array())); 2393 }else{ 2394 errln("Encode operation failed for encoder: "+encoderICU.toString()); 2395 } 2396 } 2397 { 2398 // Decoder: to Unicode conversion 2399 CharsetDecoder decoderICU = new CharsetProviderICU().charsetForName("ibm-971").newDecoder(); 2400 CharBuffer out = CharBuffer.allocate(3); 2401 decoderICU.onMalformedInput(CodingErrorAction.REPLACE); 2402 CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true); 2403 if(!result.isError()){ 2404 char[] expected = {'\u00a1', '\ufffd', '\u6676'}; 2405 if(!equals(expected, out.array())){ 2406 errln("Did not get the expected result for substitution chars. Got: "+ 2407 hex(out.array())); 2408 } 2409 logln("Output: "+ hex(out.array())); 2410 }else{ 2411 errln("Decode operation failed for encoder: "+decoderICU.toString()); 2412 } 2413 } 2414 } 2415 2416 @Test TestJB4897()2417 public void TestJB4897(){ 2418 CharsetProviderICU provider = new CharsetProviderICU(); 2419 Charset charset = provider.charsetForName("x-abracadabra"); 2420 if(charset!=null && charset.canEncode()== true){ 2421 errln("provider.charsetForName() does not validate the charset names" ); 2422 } 2423 } 2424 2425 @Test TestJB5027()2426 public void TestJB5027() { 2427 CharsetProviderICU provider= new CharsetProviderICU(); 2428 2429 Charset fake = provider.charsetForName("doesNotExist"); 2430 if(fake != null){ 2431 errln("\"doesNotExist\" returned " + fake); 2432 } 2433 Charset xfake = provider.charsetForName("x-doesNotExist"); 2434 if(xfake!=null){ 2435 errln("\"x-doesNotExist\" returned " + xfake); 2436 } 2437 } 2438 2439 //test to make sure that number of aliases and canonical names are in the charsets that are in 2440 @Test TestAllNames()2441 public void TestAllNames() { 2442 2443 CharsetProviderICU provider= new CharsetProviderICU(); 2444 Object[] available = CharsetProviderICU.getAvailableNames(); 2445 for(int i=0; i<available.length;i++){ 2446 try{ 2447 String canon = CharsetProviderICU.getICUCanonicalName((String)available[i]); 2448 2449 // ',' is not allowed by Java's charset name checker 2450 if(canon.indexOf(',')>=0){ 2451 continue; 2452 } 2453 Charset cs = provider.charsetForName((String)available[i]); 2454 2455 Object[] javaAliases = cs.aliases().toArray(); 2456 //seach for ICU canonical name in javaAliases 2457 boolean inAliasList = false; 2458 for(int j=0; j<javaAliases.length; j++){ 2459 String java = (String) javaAliases[j]; 2460 if(java.equals(canon)){ 2461 logln("javaAlias: " + java + " canon: " + canon); 2462 inAliasList = true; 2463 } 2464 } 2465 if(inAliasList == false){ 2466 errln("Could not find ICU canonical name: "+canon+ " for java canonical name: "+ available[i]+ " "+ i); 2467 } 2468 }catch(UnsupportedCharsetException ex){ 2469 errln("could no load charset "+ available[i]+" "+ex.getMessage()); 2470 continue; 2471 } 2472 } 2473 } 2474 2475 @Test TestDecoderImplFlush()2476 public void TestDecoderImplFlush() { 2477 CharsetProviderICU provider = new CharsetProviderICU(); 2478 Charset ics = provider.charsetForName("UTF-16"); 2479 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2480 execDecoder(jcs); 2481 execDecoder(ics); 2482 } 2483 2484 @Test TestEncoderImplFlush()2485 public void TestEncoderImplFlush() { 2486 CharsetProviderICU provider = new CharsetProviderICU(); 2487 Charset ics = provider.charsetForName("UTF-16"); 2488 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2489 execEncoder(jcs); 2490 execEncoder(ics); 2491 } execDecoder(Charset cs)2492 private void execDecoder(Charset cs){ 2493 CharsetDecoder decoder = cs.newDecoder(); 2494 decoder.onMalformedInput(CodingErrorAction.REPORT); 2495 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2496 CharBuffer out = CharBuffer.allocate(10); 2497 CoderResult result = decoder.decode(ByteBuffer.wrap(new byte[] { -1, 2498 -2, 32, 0, 98 }), out, false); 2499 result = decoder.decode(ByteBuffer.wrap(new byte[] { 98 }), out, true); 2500 2501 logln(cs.getClass().toString()+ ":" +result.toString()); 2502 try { 2503 result = decoder.flush(out); 2504 logln(cs.getClass().toString()+ ":" +result.toString()); 2505 } catch (Exception e) { 2506 errln(e.getMessage()+" "+cs.getClass().toString()); 2507 } 2508 } execEncoder(Charset cs)2509 private void execEncoder(Charset cs){ 2510 CharsetEncoder encoder = cs.newEncoder(); 2511 encoder.onMalformedInput(CodingErrorAction.REPORT); 2512 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2513 ByteBuffer out = ByteBuffer.allocate(10); 2514 CoderResult result = encoder.encode(CharBuffer.wrap(new char[] { '\uFFFF', 2515 '\u2345', 32, 98 }), out, false); 2516 logln(cs.getClass().toString()+ ":" +result.toString()); 2517 result = encoder.encode(CharBuffer.wrap(new char[] { 98 }), out, true); 2518 2519 logln(cs.getClass().toString()+ ":" +result.toString()); 2520 try { 2521 result = encoder.flush(out); 2522 logln(cs.getClass().toString()+ ":" +result.toString()); 2523 } catch (Exception e) { 2524 errln(e.getMessage()+" "+cs.getClass().toString()); 2525 } 2526 } 2527 2528 @Test TestDecodeMalformed()2529 public void TestDecodeMalformed() { 2530 CharsetProviderICU provider = new CharsetProviderICU(); 2531 Charset ics = provider.charsetForName("UTF-16BE"); 2532 //Use SUN's charset 2533 Charset jcs = Charset.forName("UTF-16"); 2534 CoderResult ir = execMalformed(ics); 2535 CoderResult jr = execMalformed(jcs); 2536 if(ir!=jr){ 2537 errln("ICU's decoder did not return the same result as Sun. ICU: "+ir.toString()+" Sun: "+jr.toString()); 2538 } 2539 } 2540 execMalformed(Charset cs)2541 private CoderResult execMalformed(Charset cs){ 2542 CharsetDecoder decoder = cs.newDecoder(); 2543 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2544 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2545 ByteBuffer in = ByteBuffer.wrap(new byte[] { 0x00, 0x41, 0x00, 0x42, 0x01 }); 2546 CharBuffer out = CharBuffer.allocate(3); 2547 return decoder.decode(in, out, true); 2548 } 2549 2550 @Test TestJavaUTF16Decoder()2551 public void TestJavaUTF16Decoder(){ 2552 CharsetProviderICU provider = new CharsetProviderICU(); 2553 Charset ics = provider.charsetForName("UTF-16BE"); 2554 //Use SUN's charset 2555 Charset jcs = Charset.forName("UTF-16"); 2556 Exception ie = execConvertAll(ics); 2557 Exception je = execConvertAll(jcs); 2558 if(ie!=je){ 2559 errln("ICU's decoder did not return the same result as Sun. ICU: "+ie.toString()+" Sun: "+je.toString()); 2560 } 2561 } execConvertAll(Charset cs)2562 private Exception execConvertAll(Charset cs){ 2563 ByteBuffer in = ByteBuffer.allocate(400); 2564 int i=0; 2565 while(in.position()!=in.capacity()){ 2566 in.put((byte)0xD8); 2567 in.put((byte)i); 2568 in.put((byte)0xDC); 2569 in.put((byte)i); 2570 i++; 2571 } 2572 in.limit(in.position()); 2573 in.position(0); 2574 CharsetDecoder decoder = cs.newDecoder(); 2575 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2576 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2577 try{ 2578 CharBuffer out = decoder.decode(in); 2579 if(out!=null){ 2580 logln(cs.toString()+" encoing succeeded as expected!"); 2581 } 2582 }catch ( Exception ex){ 2583 errln("Did not get expected exception for encoding: "+cs.toString()); 2584 return ex; 2585 } 2586 return null; 2587 } 2588 2589 @Test TestUTF32BOM()2590 public void TestUTF32BOM(){ 2591 2592 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-32"); 2593 char[] in = new char[] { 0xd800, 0xdc00, 2594 0xd801, 0xdc01, 2595 0xdbff, 0xdfff, 2596 0xd900, 0xdd00, 2597 0x0000, 0x0041, 2598 0x0000, 0x0042, 2599 0x0000, 0x0043}; 2600 2601 CharBuffer inBuf = CharBuffer.allocate(in.length); 2602 inBuf.put(in); 2603 CharsetEncoder encoder = cs.newEncoder(); 2604 ByteBuffer outBuf = ByteBuffer.allocate(in.length*4+4); 2605 inBuf.rewind(); 2606 encoder.encode(inBuf, outBuf, true); 2607 outBuf.rewind(); 2608 if(outBuf.get(0)!= (byte)0x00 && outBuf.get(1)!= (byte)0x00 && 2609 outBuf.get(2)!= (byte)0xFF && outBuf.get(3)!= (byte)0xFE){ 2610 errln("The UTF32 encoder did not appended bom. Length returned: " + outBuf.remaining()); 2611 } 2612 while(outBuf.hasRemaining()){ 2613 logln("0x"+hex(outBuf.get())); 2614 } 2615 CharsetDecoder decoder = cs.newDecoder(); 2616 outBuf.limit(outBuf.position()); 2617 outBuf.rewind(); 2618 CharBuffer rt = CharBuffer.allocate(in.length); 2619 CoderResult cr = decoder.decode(outBuf, rt, true); 2620 if(cr.isError()){ 2621 errln("Decoding with BOM failed. Error: "+ cr.toString()); 2622 } 2623 equals(rt, in); 2624 try{ 2625 rt.clear(); 2626 outBuf.rewind(); 2627 Charset utf16 = Charset.forName("UTF-32"); 2628 CharsetDecoder dc = utf16.newDecoder(); 2629 cr = dc.decode(outBuf, rt, true); 2630 equals(rt, in); 2631 }catch(UnsupportedCharsetException ex){ 2632 // swallow the expection. 2633 } 2634 } 2635 2636 /* 2637 * Michael Ow 2638 * Modified 070424 2639 */ 2640 /*The following two methods provides the option of exceptions when Decoding 2641 * and Encoding if needed for testing purposes. 2642 */ smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target)2643 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target) { 2644 smBufDecode(decoder, encoding, source, target, true); 2645 } smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray)2646 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray) { 2647 try { 2648 smBufDecode(decoder, encoding, source, target, false, false, backedByArray); 2649 } 2650 catch (Exception ex) { 2651 System.out.println("!exception!"); 2652 } 2653 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target)2654 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target) { 2655 smBufEncode(encoder, encoding, source, target, true); 2656 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray)2657 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray) { 2658 try { 2659 smBufEncode(encoder, encoding, source, target, false, false); 2660 } 2661 catch (Exception ex) { 2662 System.out.println("!exception!"); 2663 } 2664 } 2665 2666 //Test CharsetICUProvider 2667 @Test TestNullCanonicalName()2668 public void TestNullCanonicalName() { 2669 String enc = null; 2670 String canonicalName = CharsetProviderICU.getICUCanonicalName(enc); 2671 2672 if (canonicalName != null) { 2673 errln("getICUCanonicalName return a non-null string for given null string"); 2674 } 2675 } 2676 2677 @Test TestGetAllNames()2678 public void TestGetAllNames() { 2679 String[] names = null; 2680 2681 names = CharsetProviderICU.getAllNames(); 2682 2683 if (names == null) { 2684 errln("getAllNames returned a null string."); 2685 } 2686 } 2687 2688 //Test CharsetICU 2689 @Test TestCharsetContains()2690 public void TestCharsetContains() { 2691 boolean test; 2692 2693 CharsetProvider provider = new CharsetProviderICU(); 2694 Charset cs1 = provider.charsetForName("UTF-32"); 2695 Charset cs2 = null; 2696 2697 test = cs1.contains(cs2); 2698 2699 if (test != false) { 2700 errln("Charset.contains returned true for a null charset."); 2701 } 2702 2703 cs2 = CharsetICU.forNameICU("UTF-32"); 2704 2705 test = cs1.contains(cs2); 2706 2707 if (test != true) { 2708 errln("Charset.contains returned false for an identical charset."); 2709 } 2710 2711 cs2 = provider.charsetForName("UTF-8"); 2712 2713 test = cs1.contains(cs2); 2714 2715 if (test != false) { 2716 errln("Charset.contains returned true for a different charset."); 2717 } 2718 } 2719 2720 @Test TestCharsetICUNullCharsetName()2721 public void TestCharsetICUNullCharsetName() { 2722 String charsetName = null; 2723 2724 try { 2725 CharsetICU.forNameICU(charsetName); 2726 errln("CharsetICU.forName should have thown an exception after getting a null charsetName."); 2727 } 2728 catch(Exception ex) { 2729 } 2730 } 2731 2732 //Test CharsetASCII 2733 @Test TestCharsetASCIIOverFlow()2734 public void TestCharsetASCIIOverFlow() { 2735 int byteBufferLimit; 2736 int charBufferLimit; 2737 2738 CharsetProvider provider = new CharsetProviderICU(); 2739 Charset cs = provider.charsetForName("ASCII"); 2740 CharsetEncoder encoder = cs.newEncoder(); 2741 CharsetDecoder decoder = cs.newDecoder(); 2742 2743 CharBuffer charBuffer = CharBuffer.allocate(0x90); 2744 ByteBuffer byteBuffer = ByteBuffer.allocate(0x90); 2745 2746 CharBuffer charBufferTest = CharBuffer.allocate(0xb0); 2747 ByteBuffer byteBufferTest = ByteBuffer.allocate(0xb0); 2748 2749 for(int j=0;j<=0x7f; j++){ 2750 charBuffer.put((char)j); 2751 byteBuffer.put((byte)j); 2752 } 2753 2754 byteBuffer.limit(byteBufferLimit = byteBuffer.position()); 2755 byteBuffer.position(0); 2756 charBuffer.limit(charBufferLimit = charBuffer.position()); 2757 charBuffer.position(0); 2758 2759 //test for overflow 2760 byteBufferTest.limit(byteBufferLimit - 5); 2761 byteBufferTest.position(0); 2762 charBufferTest.limit(charBufferLimit - 5); 2763 charBufferTest.position(0); 2764 try { 2765 smBufDecode(decoder, "ASCII", byteBuffer, charBufferTest, true, false); 2766 errln("Overflow exception while decoding ASCII should have been thrown."); 2767 } 2768 catch(Exception ex) { 2769 } 2770 try { 2771 smBufEncode(encoder, "ASCII", charBuffer, byteBufferTest, true, false); 2772 errln("Overflow exception while encoding ASCII should have been thrown."); 2773 } 2774 catch (Exception ex) { 2775 } 2776 2777 // For better code coverage 2778 /* For better code coverage */ 2779 byte byteout[] = { 2780 (byte)0x01 2781 }; 2782 char charin[] = { 2783 (char)0x0001, (char)0x0002 2784 }; 2785 ByteBuffer bb = ByteBuffer.wrap(byteout); 2786 CharBuffer cb = CharBuffer.wrap(charin); 2787 // Cast up to CharSequence to insulate against the CharBuffer.subSequence() return type change 2788 // which makes code compiled for a newer JDK not run on an older one. 2789 CharBuffer cb2 = CharBuffer.wrap(((CharSequence)cb).subSequence(0, 2)); 2790 encoder.reset(); 2791 if (!(encoder.encode(cb2, bb, true)).isOverflow()) { 2792 errln("Overflow error while encoding ASCII should have occurred."); 2793 } 2794 } 2795 2796 //Test CharsetUTF7 2797 @Test TestCharsetUTF7()2798 public void TestCharsetUTF7() { 2799 CoderResult result = CoderResult.UNDERFLOW; 2800 CharsetProvider provider = new CharsetProviderICU(); 2801 Charset cs = provider.charsetForName("UTF-7"); 2802 CharsetEncoder encoder = cs.newEncoder(); 2803 CharsetDecoder decoder = cs.newDecoder(); 2804 2805 CharBuffer us = CharBuffer.allocate(0x100); 2806 ByteBuffer bs = ByteBuffer.allocate(0x100); 2807 2808 /* Unicode : A<not equal to Alpha Lamda>. */ 2809 /* UTF7: AImIDkQ. */ 2810 us.put((char)0x41); us.put((char)0x2262); us.put((char)0x391); us.put((char)0x39B); us.put((char)0x2e); 2811 bs.put((byte)0x41); bs.put((byte)0x2b); bs.put((byte)0x49); bs.put((byte)0x6d); 2812 bs.put((byte)0x49); bs.put((byte)0x44); bs.put((byte)0x6b); bs.put((byte)0x51); 2813 bs.put((byte)0x4f); bs.put((byte)0x62); bs.put((byte)0x2e); 2814 2815 bs.limit(bs.position()); 2816 bs.position(0); 2817 us.limit(us.position()); 2818 us.position(0); 2819 2820 smBufDecode(decoder, "UTF-7", bs, us); 2821 smBufEncode(encoder, "UTF-7", us, bs); 2822 2823 /* Testing UTF-7 toUnicode with substitute callbacks */ 2824 { 2825 byte [] bytesTestErrorConsumption = { 2826 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 2827 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 2828 2829 }; 2830 char [] unicodeTestErrorConsumption = { 2831 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 2832 }; 2833 bs = ByteBuffer.wrap(bytesTestErrorConsumption); 2834 us = CharBuffer.wrap(unicodeTestErrorConsumption); 2835 2836 CodingErrorAction savedMal = decoder.malformedInputAction(); 2837 CodingErrorAction savedUMap = decoder.unmappableCharacterAction(); 2838 decoder.onMalformedInput(CodingErrorAction.REPLACE); 2839 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 2840 smBufDecode(decoder, "UTF-7 DE Error Consumption", bs, us); 2841 decoder.onMalformedInput(savedMal); 2842 decoder.onUnmappableCharacter(savedUMap); 2843 } 2844 /* ticket 6151 */ 2845 CharBuffer smallus = CharBuffer.allocate(1); 2846 ByteBuffer bigbs = ByteBuffer.allocate(3); 2847 bigbs.put((byte)0x41); bigbs.put((byte)0x41); bigbs.put((byte)0x41); 2848 bigbs.position(0); 2849 try { 2850 smBufDecode(decoder, "UTF-7-DE-Overflow", bigbs, smallus, true, false); 2851 errln("Buffer Overflow exception should have been thrown while decoding UTF-7."); 2852 } catch (Exception ex) { 2853 } 2854 2855 //The rest of the code in this method is to provide better code coverage 2856 CharBuffer ccus = CharBuffer.allocate(0x10); 2857 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 2858 2859 //start of charset decoder code coverage code 2860 //test for accurate illegal and control character checking 2861 ccbs.put((byte)0x0D); ccbs.put((byte)0x05); 2862 ccus.put((char)0x0000); 2863 2864 ccbs.limit(ccbs.position()); 2865 ccbs.position(0); 2866 ccus.limit(ccus.position()); 2867 ccus.position(0); 2868 2869 try { 2870 smBufDecode(decoder, "UTF-7-CC-DE-1", ccbs, ccus, true, false); 2871 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2872 } 2873 catch (Exception ex) { 2874 } 2875 2876 ccbs.clear(); 2877 ccus.clear(); 2878 2879 //test for illegal base64 character 2880 ccbs.put((byte)0x2b); ccbs.put((byte)0xff); 2881 ccus.put((char)0x0000); 2882 2883 ccbs.limit(ccbs.position()); 2884 ccbs.position(0); 2885 ccus.limit(ccus.position()); 2886 ccus.position(0); 2887 2888 try { 2889 smBufDecode(decoder, "UTF-7-CC-DE-2", ccbs, ccus, true, false); 2890 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2891 } 2892 catch (Exception ex) { 2893 } 2894 2895 ccbs.clear(); 2896 ccus.clear(); 2897 2898 //test for illegal order of the base64 character sequence 2899 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 2900 ccus.put((char)0x0000); ccus.put((char)0x0000); 2901 2902 ccbs.limit(ccbs.position()); 2903 ccbs.position(0); 2904 ccus.limit(ccus.position()); 2905 ccus.position(0); 2906 2907 try { 2908 smBufDecode(decoder, "UTF-7-CC-DE-3", ccbs, ccus, true, false); 2909 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2910 } 2911 catch (Exception ex) { 2912 } 2913 2914 ccbs.clear(); 2915 ccus.clear(); 2916 2917 //test for illegal order of the base64 character sequence 2918 ccbs.put((byte)0x2b); ccbs.put((byte)0x0a); ccbs.put((byte)0x09); 2919 ccus.put((char)0x0000); 2920 2921 ccbs.limit(ccbs.position()); 2922 ccbs.position(0); 2923 ccus.limit(ccus.position()); 2924 ccus.position(0); 2925 2926 try { 2927 smBufDecode(decoder, "UTF-7-CC-DE-4", ccbs, ccus, true, false); 2928 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2929 } 2930 catch (Exception ex) { 2931 } 2932 2933 ccbs.clear(); 2934 ccus.clear(); 2935 2936 //test for illegal order of the base64 character sequence 2937 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x0a); 2938 ccus.put((char)0x0000); 2939 2940 ccbs.limit(ccbs.position()); 2941 ccbs.position(0); 2942 ccus.limit(ccus.position()); 2943 ccus.position(0); 2944 2945 try { 2946 smBufDecode(decoder, "UTF-7-CC-DE-5", ccbs, ccus, true, false); 2947 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2948 } 2949 catch (Exception ex) { 2950 } 2951 2952 ccbs.clear(); 2953 ccus.clear(); 2954 2955 //test for illegal order of the base64 character sequence 2956 ccbs.put((byte)0x2b); ccbs.put((byte)0x00); 2957 ccus.put((char)0x0000); 2958 2959 ccbs.limit(ccbs.position()); 2960 ccbs.position(0); 2961 ccus.limit(ccus.position()); 2962 ccus.position(0); 2963 2964 try { 2965 smBufDecode(decoder, "UTF-7-CC-DE-6", ccbs, ccus, true, false); 2966 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2967 } 2968 catch (Exception ex) { 2969 } 2970 2971 ccbs.clear(); 2972 ccus.clear(); 2973 2974 //test for overflow buffer error 2975 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); 2976 2977 ccbs.limit(ccbs.position()); 2978 ccbs.position(0); 2979 ccus.limit(0); 2980 ccus.position(0); 2981 2982 try { 2983 smBufDecode(decoder, "UTF-7-CC-DE-7", ccbs, ccus, true, false); 2984 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2985 } 2986 catch (Exception ex) { 2987 } 2988 2989 ccbs.clear(); 2990 ccus.clear(); 2991 2992 //test for overflow buffer error 2993 ccbs.put((byte)0x0c); ccbs.put((byte)0x0c); 2994 2995 ccbs.limit(ccbs.position()); 2996 ccbs.position(0); 2997 ccus.limit(0); 2998 ccus.position(0); 2999 3000 try { 3001 smBufDecode(decoder, "UTF-7-CC-DE-8", ccbs, ccus, true, false); 3002 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 3003 } 3004 catch (Exception ex) { 3005 } 3006 //end of charset decoder code coverage code 3007 3008 //start of charset encoder code coverage code 3009 ccbs.clear(); 3010 ccus.clear(); 3011 //test for overflow buffer error 3012 ccus.put((char)0x002b); 3013 ccbs.put((byte)0x2b); 3014 3015 ccbs.limit(ccbs.position()); 3016 ccbs.position(0); 3017 ccus.limit(ccus.position()); 3018 ccus.position(0); 3019 3020 try { 3021 smBufEncode(encoder, "UTF-7-CC-EN-1", ccus, ccbs, true, false); 3022 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3023 } 3024 catch (Exception ex) { 3025 } 3026 3027 ccbs.clear(); 3028 ccus.clear(); 3029 3030 //test for overflow buffer error 3031 ccus.put((char)0x002b); ccus.put((char)0x2262); 3032 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3033 3034 ccbs.limit(ccbs.position()); 3035 ccbs.position(0); 3036 ccus.limit(ccus.position()); 3037 ccus.position(0); 3038 3039 try { 3040 smBufEncode(encoder, "UTF-7-CC-EN-2", ccus, ccbs, true, false); 3041 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3042 } 3043 catch (Exception ex) { 3044 } 3045 3046 ccbs.clear(); 3047 ccus.clear(); 3048 3049 //test for overflow buffer error 3050 ccus.put((char)0x2262); ccus.put((char)0x0049); 3051 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3052 ccbs.limit(ccbs.position()); 3053 ccbs.position(0); 3054 ccus.limit(ccus.position()); 3055 ccus.position(0); 3056 3057 try { 3058 smBufEncode(encoder, "UTF-7-CC-EN-3", ccus, ccbs, true, false); 3059 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3060 } 3061 catch (Exception ex) { 3062 } 3063 3064 ccbs.clear(); 3065 ccus.clear(); 3066 3067 //test for overflow buffer error 3068 ccus.put((char)0x2262); ccus.put((char)0x0395); 3069 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3070 ccbs.limit(ccbs.position()); 3071 ccbs.position(0); 3072 ccus.limit(ccus.position()); 3073 ccus.position(0); 3074 3075 try { 3076 smBufEncode(encoder, "UTF-7-CC-EN-4", ccus, ccbs, true, false); 3077 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3078 } 3079 catch (Exception ex) { 3080 } 3081 3082 ccbs.clear(); 3083 ccus.clear(); 3084 3085 //test for overflow buffer error 3086 ccus.put((char)0x2262); ccus.put((char)0x0395); 3087 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3088 ccbs.limit(ccbs.position()); 3089 ccbs.position(0); 3090 ccus.limit(ccus.position()); 3091 ccus.position(0); 3092 3093 try { 3094 smBufEncode(encoder, "UTF-7-CC-EN-5", ccus, ccbs, true, false); 3095 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3096 } 3097 catch (Exception ex) { 3098 } 3099 3100 ccbs.clear(); 3101 ccus.clear(); 3102 3103 //test for overflow buffer error 3104 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3105 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3106 ccbs.limit(ccbs.position()); 3107 ccbs.position(0); 3108 ccus.limit(ccus.position()); 3109 ccus.position(0); 3110 3111 try { 3112 smBufEncode(encoder, "UTF-7-CC-EN-6", ccus, ccbs, true, false); 3113 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3114 } 3115 catch (Exception ex) { 3116 } 3117 3118 ccbs.clear(); 3119 ccus.clear(); 3120 3121 //test for overflow buffer error 3122 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3123 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3124 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3125 ccbs.limit(ccbs.position()); 3126 ccbs.position(0); 3127 ccus.limit(ccus.position()); 3128 ccus.position(0); 3129 3130 try { 3131 smBufEncode(encoder, "UTF-7-CC-EN-7", ccus, ccbs, true, false); 3132 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3133 } 3134 catch (Exception ex) { 3135 } 3136 3137 ccbs.clear(); 3138 ccus.clear(); 3139 3140 //test for overflow buffer error 3141 ccus.put((char)0x0049); ccus.put((char)0x0048); 3142 ccbs.put((byte)0x00); 3143 ccbs.limit(ccbs.position()); 3144 ccbs.position(0); 3145 ccus.limit(ccus.position()); 3146 ccus.position(0); 3147 3148 try { 3149 smBufEncode(encoder, "UTF-7-CC-EN-8", ccus, ccbs, true, false); 3150 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3151 } 3152 catch (Exception ex) { 3153 } 3154 3155 ccbs.clear(); 3156 ccus.clear(); 3157 3158 //test for overflow buffer error 3159 ccus.put((char)0x2262); 3160 ccbs.put((byte)0x00); 3161 ccbs.limit(ccbs.position()); 3162 ccbs.position(0); 3163 ccus.limit(ccus.position()); 3164 ccus.position(0); 3165 3166 try { 3167 smBufEncode(encoder, "UTF-7-CC-EN-9", ccus, ccbs, true, false); 3168 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3169 } 3170 catch (Exception ex) { 3171 } 3172 3173 ccbs.clear(); 3174 ccus.clear(); 3175 3176 //test for overflow buffer error 3177 ccus.put((char)0x2262); ccus.put((char)0x0049); 3178 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3179 ccbs.limit(ccbs.position()); 3180 ccbs.position(0); 3181 ccus.limit(ccus.position()); 3182 ccus.position(0); 3183 3184 try { 3185 smBufEncode(encoder, "UTF-7-CC-EN-10", ccus, ccbs, true, false); 3186 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3187 } 3188 catch (Exception ex) { 3189 } 3190 3191 ccbs.clear(); 3192 ccus.clear(); 3193 3194 //test for overflow buffer error 3195 ccus.put((char)0x2262); 3196 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x6d); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 3197 3198 ccbs.limit(ccbs.position()); 3199 ccbs.position(0); 3200 ccus.limit(ccus.position()); 3201 ccus.position(0); 3202 try { 3203 smBufEncode(encoder, "UTF-7-CC-EN-11", ccus, ccbs, false, true); 3204 } catch (Exception ex) { 3205 errln("Exception while encoding UTF-7 code coverage test should not have been thrown."); 3206 } 3207 3208 ccbs.clear(); 3209 ccus.clear(); 3210 3211 //test for overflow buffer error 3212 encoder.reset(); 3213 ccus.put((char)0x3980); ccus.put((char)0x2715); 3214 ccbs.put((byte)0x2b); ccbs.put((byte)0x4f); ccbs.put((byte)0x59); ccbs.put((byte)0x2d); 3215 3216 ccbs.limit(ccbs.position()); 3217 ccbs.position(0); 3218 ccus.limit(ccus.position()); 3219 ccus.position(0); 3220 3221 result = encoder.encode(ccus, ccbs, true); 3222 result = encoder.flush(ccbs); 3223 if (!result.isOverflow()) { 3224 errln("Overflow buffer while encoding UTF-7 should have occurred."); 3225 } 3226 //end of charset encoder code coverage code 3227 } 3228 3229 @Test TestBug12956()3230 public void TestBug12956() { 3231 final CharsetProvider provider = new CharsetProviderICU(); 3232 final Charset cs_utf7 = provider.charsetForName("UTF-7"); 3233 final Charset cs_imap = provider.charsetForName("IMAP-mailbox-name"); 3234 final String test = "新"; 3235 final byte[] expected_utf7 = {0x2b, 0x5a, 0x62, 0x41, 0x2d}; 3236 final byte[] expected_imap = {0x26, 0x5a, 0x62, 0x41, 0x2d}; 3237 3238 byte[] bytes = test.getBytes(cs_utf7); 3239 if (!Arrays.equals(bytes, expected_utf7)) { 3240 errln("Incorrect UTF-7 conversion. Got " + new String(bytes) + " but expect " + 3241 new String(expected_utf7)); 3242 } 3243 3244 bytes = test.getBytes(cs_imap); 3245 if (!Arrays.equals(bytes, expected_imap)) { 3246 errln("Incorrect IMAP-mailbox-name conversion. Got " + new String(bytes) + 3247 " but expect " + new String(expected_imap)); 3248 } 3249 } 3250 3251 //Test Charset ISCII 3252 @Test TestCharsetISCII()3253 public void TestCharsetISCII() { 3254 CharsetProvider provider = new CharsetProviderICU(); 3255 Charset cs = provider.charsetForName("ISCII,version=0"); 3256 CharsetEncoder encoder = cs.newEncoder(); 3257 CharsetDecoder decoder = cs.newDecoder(); 3258 3259 CharBuffer us = CharBuffer.allocate(0x100); 3260 ByteBuffer bs = ByteBuffer.allocate(0x100); 3261 ByteBuffer bsr = ByteBuffer.allocate(0x100); 3262 3263 //test full range of Devanagari 3264 us.put((char)0x0901); us.put((char)0x0902); us.put((char)0x0903); us.put((char)0x0905); us.put((char)0x0906); us.put((char)0x0907); 3265 us.put((char)0x0908); us.put((char)0x0909); us.put((char)0x090A); us.put((char)0x090B); us.put((char)0x090E); us.put((char)0x090F); 3266 us.put((char)0x0910); us.put((char)0x090D); us.put((char)0x0912); us.put((char)0x0913); us.put((char)0x0914); us.put((char)0x0911); 3267 us.put((char)0x0915); us.put((char)0x0916); us.put((char)0x0917); us.put((char)0x0918); us.put((char)0x0919); us.put((char)0x091A); 3268 us.put((char)0x091B); us.put((char)0x091C); us.put((char)0x091D); us.put((char)0x091E); us.put((char)0x091F); us.put((char)0x0920); 3269 us.put((char)0x0921); us.put((char)0x0922); us.put((char)0x0923); us.put((char)0x0924); us.put((char)0x0925); us.put((char)0x0926); 3270 us.put((char)0x0927); us.put((char)0x0928); us.put((char)0x0929); us.put((char)0x092A); us.put((char)0x092B); us.put((char)0x092C); 3271 us.put((char)0x092D); us.put((char)0x092E); us.put((char)0x092F); us.put((char)0x095F); us.put((char)0x0930); us.put((char)0x0931); 3272 us.put((char)0x0932); us.put((char)0x0933); us.put((char)0x0934); us.put((char)0x0935); us.put((char)0x0936); us.put((char)0x0937); 3273 us.put((char)0x0938); us.put((char)0x0939); us.put((char)0x200D); us.put((char)0x093E); us.put((char)0x093F); us.put((char)0x0940); 3274 us.put((char)0x0941); us.put((char)0x0942); us.put((char)0x0943); us.put((char)0x0946); us.put((char)0x0947); us.put((char)0x0948); 3275 us.put((char)0x0945); us.put((char)0x094A); us.put((char)0x094B); us.put((char)0x094C); us.put((char)0x0949); us.put((char)0x094D); 3276 us.put((char)0x093D); us.put((char)0x0966); us.put((char)0x0967); us.put((char)0x0968); us.put((char)0x0969); us.put((char)0x096A); 3277 us.put((char)0x096B); us.put((char)0x096C); us.put((char)0x096D); us.put((char)0x096E); us.put((char)0x096F); 3278 3279 bs.put((byte)0xEF); bs.put((byte)0x42); 3280 bs.put((byte)0xA1); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xA4); bs.put((byte)0xA5); bs.put((byte)0xA6); 3281 bs.put((byte)0xA7); bs.put((byte)0xA8); bs.put((byte)0xA9); bs.put((byte)0xAA); bs.put((byte)0xAB); bs.put((byte)0xAC); 3282 bs.put((byte)0xAD); bs.put((byte)0xAE); bs.put((byte)0xAF); bs.put((byte)0xB0); bs.put((byte)0xB1); bs.put((byte)0xB2); 3283 bs.put((byte)0xB3); bs.put((byte)0xB4); bs.put((byte)0xB5); bs.put((byte)0xB6); bs.put((byte)0xB7); bs.put((byte)0xB8); 3284 bs.put((byte)0xB9); bs.put((byte)0xBA); bs.put((byte)0xBB); bs.put((byte)0xBC); bs.put((byte)0xBD); bs.put((byte)0xBE); 3285 bs.put((byte)0xBF); bs.put((byte)0xC0); bs.put((byte)0xC1); bs.put((byte)0xC2); bs.put((byte)0xC3); bs.put((byte)0xC4); 3286 bs.put((byte)0xC5); bs.put((byte)0xC6); bs.put((byte)0xC7); bs.put((byte)0xC8); bs.put((byte)0xC9); bs.put((byte)0xCA); 3287 bs.put((byte)0xCB); bs.put((byte)0xCC); bs.put((byte)0xCD); bs.put((byte)0xCE); bs.put((byte)0xCF); bs.put((byte)0xD0); 3288 bs.put((byte)0xD1); bs.put((byte)0xD2); bs.put((byte)0xD3); bs.put((byte)0xD4); bs.put((byte)0xD5); bs.put((byte)0xD6); 3289 bs.put((byte)0xD7); bs.put((byte)0xD8); bs.put((byte)0xD9); bs.put((byte)0xDA); bs.put((byte)0xDB); bs.put((byte)0xDC); 3290 bs.put((byte)0xDD); bs.put((byte)0xDE); bs.put((byte)0xDF); bs.put((byte)0xE0); bs.put((byte)0xE1); bs.put((byte)0xE2); 3291 bs.put((byte)0xE3); bs.put((byte)0xE4); bs.put((byte)0xE5); bs.put((byte)0xE6); bs.put((byte)0xE7); bs.put((byte)0xE8); 3292 bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xF1); bs.put((byte)0xF2); bs.put((byte)0xF3); bs.put((byte)0xF4); 3293 bs.put((byte)0xF5); bs.put((byte)0xF6); bs.put((byte)0xF7); bs.put((byte)0xF8); bs.put((byte)0xF9); bs.put((byte)0xFA); 3294 3295 bsr.put((byte)0xA1); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xA4); bsr.put((byte)0xA5); bsr.put((byte)0xA6); 3296 bsr.put((byte)0xA7); bsr.put((byte)0xA8); bsr.put((byte)0xA9); bsr.put((byte)0xAA); bsr.put((byte)0xAB); bsr.put((byte)0xAC); 3297 bsr.put((byte)0xAD); bsr.put((byte)0xAE); bsr.put((byte)0xAF); bsr.put((byte)0xB0); bsr.put((byte)0xB1); bsr.put((byte)0xB2); 3298 bsr.put((byte)0xB3); bsr.put((byte)0xB4); bsr.put((byte)0xB5); bsr.put((byte)0xB6); bsr.put((byte)0xB7); bsr.put((byte)0xB8); 3299 bsr.put((byte)0xB9); bsr.put((byte)0xBA); bsr.put((byte)0xBB); bsr.put((byte)0xBC); bsr.put((byte)0xBD); bsr.put((byte)0xBE); 3300 bsr.put((byte)0xBF); bsr.put((byte)0xC0); bsr.put((byte)0xC1); bsr.put((byte)0xC2); bsr.put((byte)0xC3); bsr.put((byte)0xC4); 3301 bsr.put((byte)0xC5); bsr.put((byte)0xC6); bsr.put((byte)0xC7); bsr.put((byte)0xC8); bsr.put((byte)0xC9); bsr.put((byte)0xCA); 3302 bsr.put((byte)0xCB); bsr.put((byte)0xCC); bsr.put((byte)0xCD); bsr.put((byte)0xCE); bsr.put((byte)0xCF); bsr.put((byte)0xD0); 3303 bsr.put((byte)0xD1); bsr.put((byte)0xD2); bsr.put((byte)0xD3); bsr.put((byte)0xD4); bsr.put((byte)0xD5); bsr.put((byte)0xD6); 3304 bsr.put((byte)0xD7); bsr.put((byte)0xD8); bsr.put((byte)0xD9); bsr.put((byte)0xDA); bsr.put((byte)0xDB); bsr.put((byte)0xDC); 3305 bsr.put((byte)0xDD); bsr.put((byte)0xDE); bsr.put((byte)0xDF); bsr.put((byte)0xE0); bsr.put((byte)0xE1); bsr.put((byte)0xE2); 3306 bsr.put((byte)0xE3); bsr.put((byte)0xE4); bsr.put((byte)0xE5); bsr.put((byte)0xE6); bsr.put((byte)0xE7); bsr.put((byte)0xE8); 3307 bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xF1); bsr.put((byte)0xF2); bsr.put((byte)0xF3); bsr.put((byte)0xF4); 3308 bsr.put((byte)0xF5); bsr.put((byte)0xF6); bsr.put((byte)0xF7); bsr.put((byte)0xF8); bsr.put((byte)0xF9); bsr.put((byte)0xFA); 3309 3310 //test Soft Halant 3311 us.put((char)0x0915); us.put((char)0x094d); us.put((char)0x200D); 3312 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE9); 3313 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE9); 3314 3315 //test explicit halant 3316 us.put((char)0x0915); us.put((char)0x094D); us.put((char)0x200C); 3317 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE8); 3318 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE8); 3319 3320 //test double danda 3321 us.put((char)0x0965); 3322 bs.put((byte)0xEA); bs.put((byte)0xEA); 3323 bsr.put((byte)0xEA); bsr.put((byte)0xEA); 3324 3325 //test ASCII 3326 us.put((char)0x1B); us.put((char)0x24); us.put((char)0x29); us.put((char)0x47); us.put((char)0x0E); us.put((char)0x23); 3327 us.put((char)0x21); us.put((char)0x23); us.put((char)0x22); us.put((char)0x23); us.put((char)0x23); us.put((char)0x23); 3328 us.put((char)0x24); us.put((char)0x23); us.put((char)0x25); us.put((char)0x23); us.put((char)0x26); us.put((char)0x23); 3329 us.put((char)0x27); us.put((char)0x23); us.put((char)0x28); us.put((char)0x23); us.put((char)0x29); us.put((char)0x23); 3330 us.put((char)0x2A); us.put((char)0x23); us.put((char)0x2B); us.put((char)0x0F); us.put((char)0x2F); us.put((char)0x2A); 3331 3332 bs.put((byte)0x1B); bs.put((byte)0x24); bs.put((byte)0x29); bs.put((byte)0x47); bs.put((byte)0x0E); bs.put((byte)0x23); 3333 bs.put((byte)0x21); bs.put((byte)0x23); bs.put((byte)0x22); bs.put((byte)0x23); bs.put((byte)0x23); bs.put((byte)0x23); 3334 bs.put((byte)0x24); bs.put((byte)0x23); bs.put((byte)0x25); bs.put((byte)0x23); bs.put((byte)0x26); bs.put((byte)0x23); 3335 bs.put((byte)0x27); bs.put((byte)0x23); bs.put((byte)0x28); bs.put((byte)0x23); bs.put((byte)0x29); bs.put((byte)0x23); 3336 bs.put((byte)0x2A); bs.put((byte)0x23); bs.put((byte)0x2B); bs.put((byte)0x0F); bs.put((byte)0x2F); bs.put((byte)0x2A); 3337 3338 bsr.put((byte)0x1B); bsr.put((byte)0x24); bsr.put((byte)0x29); bsr.put((byte)0x47); bsr.put((byte)0x0E); bsr.put((byte)0x23); 3339 bsr.put((byte)0x21); bsr.put((byte)0x23); bsr.put((byte)0x22); bsr.put((byte)0x23); bsr.put((byte)0x23); bsr.put((byte)0x23); 3340 bsr.put((byte)0x24); bsr.put((byte)0x23); bsr.put((byte)0x25); bsr.put((byte)0x23); bsr.put((byte)0x26); bsr.put((byte)0x23); 3341 bsr.put((byte)0x27); bsr.put((byte)0x23); bsr.put((byte)0x28); bsr.put((byte)0x23); bsr.put((byte)0x29); bsr.put((byte)0x23); 3342 bsr.put((byte)0x2A); bsr.put((byte)0x23); bsr.put((byte)0x2B); bsr.put((byte)0x0F); bsr.put((byte)0x2F); bsr.put((byte)0x2A); 3343 3344 //test from Lotus 3345 //Some of the Lotus ISCII code points have been changed or commented out. 3346 us.put((char)0x0061); us.put((char)0x0915); us.put((char)0x000D); us.put((char)0x000A); us.put((char)0x0996); us.put((char)0x0043); 3347 us.put((char)0x0930); us.put((char)0x094D); us.put((char)0x200D); us.put((char)0x0901); us.put((char)0x000D); us.put((char)0x000A); 3348 us.put((char)0x0905); us.put((char)0x0985); us.put((char)0x0043); us.put((char)0x0915); us.put((char)0x0921); us.put((char)0x002B); 3349 us.put((char)0x095F); 3350 bs.put((byte)0x61); bs.put((byte)0xB3); 3351 bs.put((byte)0x0D); bs.put((byte)0x0A); 3352 bs.put((byte)0xEF); bs.put((byte)0x42); 3353 bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xB4); bs.put((byte)0x43); 3354 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xCF); bs.put((byte)0xE8); bs.put((byte)0xE9); bs.put((byte)0xA1); bs.put((byte)0x0D); bs.put((byte)0x0A); bs.put((byte)0xEF); bs.put((byte)0x42); 3355 bs.put((byte)0xA4); bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xA4); bs.put((byte)0x43); bs.put((byte)0xEF); 3356 bs.put((byte)0x42); bs.put((byte)0xB3); bs.put((byte)0xBF); bs.put((byte)0x2B); 3357 bs.put((byte)0xCE); 3358 bsr.put((byte)0x61); bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xEF); bsr.put((byte)0x30); bsr.put((byte)0xB3); 3359 bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xB4); bsr.put((byte)0x43); 3360 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xCF); bsr.put((byte)0xE8); bsr.put((byte)0xD9); bsr.put((byte)0xEF); 3361 bsr.put((byte)0x42); bsr.put((byte)0xA1); bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3362 bsr.put((byte)0xA4); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xA4); bsr.put((byte)0x43); bsr.put((byte)0xEF); 3363 bsr.put((byte)0x42); bsr.put((byte)0xB3); bsr.put((byte)0xBF); bsr.put((byte)0x2B); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3364 bsr.put((byte)0xCE); 3365 //end of test from Lotus 3366 3367 //tamil range 3368 us.put((char)0x0B86); us.put((char)0x0B87); us.put((char)0x0B88); 3369 bs.put((byte)0xEF); bs.put((byte)0x44); bs.put((byte)0xA5); bs.put((byte)0xA6); bs.put((byte)0xA7); 3370 bsr.put((byte)0xEF); bsr.put((byte)0x44); bsr.put((byte)0xA5); bsr.put((byte)0xA6); bsr.put((byte)0xA7); 3371 3372 //telugu range 3373 us.put((char)0x0C05); us.put((char)0x0C02); us.put((char)0x0C03); us.put((char)0x0C31); 3374 bs.put((byte)0xEF); bs.put((byte)0x45); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xD0); 3375 bsr.put((byte)0xEF); bsr.put((byte)0x45); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xD0); 3376 3377 //kannada range 3378 us.put((char)0x0C85); us.put((char)0x0C82); us.put((char)0x0C83); 3379 bs.put((byte)0xEF); bs.put((byte)0x48); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); 3380 bsr.put((byte)0xEF); bsr.put((byte)0x48); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); 3381 3382 //test Abbr sign and Anudatta 3383 us.put((char)0x0970); us.put((char)0x0952); us.put((char)0x0960); us.put((char)0x0944); us.put((char)0x090C); us.put((char)0x0962); 3384 us.put((char)0x0961); us.put((char)0x0963); us.put((char)0x0950); us.put((char)0x093D); us.put((char)0x0958); us.put((char)0x0959); 3385 us.put((char)0x095A); us.put((char)0x095B); us.put((char)0x095C); us.put((char)0x095D); us.put((char)0x095E); us.put((char)0x0020); 3386 us.put((char)0x094D); us.put((char)0x0930); us.put((char)0x0000); us.put((char)0x00A0); 3387 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xF0); bs.put((byte)0xBF); bs.put((byte)0xF0); bs.put((byte)0xB8); 3388 bs.put((byte)0xAA); bs.put((byte)0xE9); bs.put((byte)0xDF); bs.put((byte)0xE9); bs.put((byte)0xA6); bs.put((byte)0xE9); 3389 bs.put((byte)0xDB); bs.put((byte)0xE9); bs.put((byte)0xA7); bs.put((byte)0xE9); bs.put((byte)0xDC); bs.put((byte)0xE9); 3390 bs.put((byte)0xA1); bs.put((byte)0xE9); bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xB3); bs.put((byte)0xE9); 3391 bs.put((byte)0xB4); bs.put((byte)0xE9); bs.put((byte)0xB5); bs.put((byte)0xE9); bs.put((byte)0xBA); bs.put((byte)0xE9); 3392 bs.put((byte)0xBF); bs.put((byte)0xE9); bs.put((byte)0xC0); bs.put((byte)0xE9); bs.put((byte)0xC9); bs.put((byte)0xE9); 3393 bs.put((byte)0x20); bs.put((byte)0xE8); bs.put((byte)0xCF); bs.put((byte)0x00); bs.put((byte)0xA0); 3394 //bs.put((byte)0xEF); bs.put((byte)0x30); 3395 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xF0); bsr.put((byte)0xBF); bsr.put((byte)0xF0); bsr.put((byte)0xB8); 3396 bsr.put((byte)0xAA); bsr.put((byte)0xE9); bsr.put((byte)0xDF); bsr.put((byte)0xE9); bsr.put((byte)0xA6); bsr.put((byte)0xE9); 3397 bsr.put((byte)0xDB); bsr.put((byte)0xE9); bsr.put((byte)0xA7); bsr.put((byte)0xE9); bsr.put((byte)0xDC); bsr.put((byte)0xE9); 3398 bsr.put((byte)0xA1); bsr.put((byte)0xE9); bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xB3); bsr.put((byte)0xE9); 3399 bsr.put((byte)0xB4); bsr.put((byte)0xE9); bsr.put((byte)0xB5); bsr.put((byte)0xE9); bsr.put((byte)0xBA); bsr.put((byte)0xE9); 3400 bsr.put((byte)0xBF); bsr.put((byte)0xE9); bsr.put((byte)0xC0); bsr.put((byte)0xE9); bsr.put((byte)0xC9); bsr.put((byte)0xE9); 3401 bsr.put((byte)0xD9); bsr.put((byte)0xE8); bsr.put((byte)0xCF); bsr.put((byte)0x00); bsr.put((byte)0xA0); 3402 3403 bs.limit(bs.position()); 3404 bs.position(0); 3405 us.limit(us.position()); 3406 us.position(0); 3407 bsr.limit(bsr.position()); 3408 bsr.position(0); 3409 3410 //round trip test 3411 try { 3412 smBufDecode(decoder, "ISCII-part1", bsr, us, false, true); 3413 smBufEncode(encoder, "ISCII-part2", us, bs); 3414 smBufDecode(decoder, "ISCII-part3", bs, us, false, true); 3415 } catch (Exception ex) { 3416 errln("ISCII round trip test failed."); 3417 } 3418 3419 //Test new characters in the ISCII charset 3420 encoder = provider.charsetForName("ISCII,version=0").newEncoder(); 3421 decoder = provider.charsetForName("ISCII,version=0").newDecoder(); 3422 char u_pts[] = { 3423 /* DEV */ (char)0x0904, 3424 /* PNJ */ (char)0x0A01, (char)0x0A03, (char)0x0A33, (char)0x0A70 3425 }; 3426 byte b_pts[] = { 3427 (byte)0xef, (byte)0x42, 3428 /* DEV */ (byte)0xa4, (byte)0xe0, 3429 /* PNJ */ (byte)0xef, (byte)0x4b, (byte)0xa1, (byte)0xa3, (byte)0xd2, (byte)0xf0, (byte)0xbf 3430 }; 3431 us = CharBuffer.allocate(u_pts.length); 3432 bs = ByteBuffer.allocate(b_pts.length); 3433 us.put(u_pts); 3434 bs.put(b_pts); 3435 3436 bs.limit(bs.position()); 3437 bs.position(0); 3438 us.limit(us.position()); 3439 us.position(0); 3440 3441 try { 3442 smBufDecode(decoder, "ISCII-update", bs, us, true, true); 3443 bs.position(0); 3444 us.position(0); 3445 smBufEncode(encoder, "ISCII-update", us, bs, true, true); 3446 } catch (Exception ex) { 3447 errln("Error occurred while encoding/decoding ISCII with the new characters."); 3448 } 3449 3450 //The rest of the code in this method is to provide better code coverage 3451 CharBuffer ccus = CharBuffer.allocate(0x10); 3452 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 3453 3454 //start of charset decoder code coverage code 3455 //test overflow buffer 3456 ccbs.put((byte)0x49); 3457 3458 ccbs.limit(ccbs.position()); 3459 ccbs.position(0); 3460 ccus.limit(0); 3461 ccus.position(0); 3462 3463 try { 3464 smBufDecode(decoder, "ISCII-CC-DE-1", ccbs, ccus, true, false); 3465 errln("Exception while decoding ISCII should have been thrown."); 3466 } 3467 catch (Exception ex) { 3468 } 3469 3470 ccbs.clear(); 3471 ccus.clear(); 3472 3473 //test atr overflow buffer 3474 ccbs.put((byte)0xEF); ccbs.put((byte)0x40); ccbs.put((byte)0xEF); ccbs.put((byte)0x20); 3475 ccus.put((char)0x00); 3476 3477 ccbs.limit(ccbs.position()); 3478 ccbs.position(0); 3479 ccus.limit(ccus.position()); 3480 ccus.position(0); 3481 3482 try { 3483 smBufDecode(decoder, "ISCII-CC-DE-2", ccbs, ccus, true, false); 3484 errln("Exception while decoding ISCII should have been thrown."); 3485 } 3486 catch (Exception ex) { 3487 } 3488 3489 //end of charset decoder code coverage code 3490 3491 ccbs.clear(); 3492 ccus.clear(); 3493 3494 //start of charset encoder code coverage code 3495 //test ascii overflow buffer 3496 ccus.put((char)0x41); 3497 3498 ccus.limit(ccus.position()); 3499 ccus.position(0); 3500 ccbs.limit(0); 3501 ccbs.position(0); 3502 3503 try { 3504 smBufEncode(encoder, "ISCII-CC-EN-1", ccus, ccbs, true, false); 3505 errln("Exception while encoding ISCII should have been thrown."); 3506 } 3507 catch (Exception ex) { 3508 } 3509 3510 ccbs.clear(); 3511 ccus.clear(); 3512 3513 //test ascii overflow buffer 3514 ccus.put((char)0x0A); ccus.put((char)0x0043); 3515 ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3516 3517 ccus.limit(ccus.position()); 3518 ccus.position(0); 3519 ccbs.limit(ccbs.position()); 3520 ccbs.position(0); 3521 3522 try { 3523 smBufEncode(encoder, "ISCII-CC-EN-2", ccus, ccbs, true, false); 3524 errln("Exception while encoding ISCII should have been thrown."); 3525 } 3526 catch (Exception ex) { 3527 } 3528 3529 ccbs.clear(); 3530 ccus.clear(); 3531 3532 //test surrogate malform 3533 ccus.put((char)0x06E3); 3534 ccbs.put((byte)0x00); 3535 3536 ccus.limit(ccus.position()); 3537 ccus.position(0); 3538 ccbs.limit(ccbs.position()); 3539 ccbs.position(0); 3540 3541 try { 3542 smBufEncode(encoder, "ISCII-CC-EN-3", ccus, ccbs, true, false); 3543 errln("Exception while encoding ISCII should have been thrown."); 3544 } 3545 catch (Exception ex) { 3546 } 3547 3548 ccbs.clear(); 3549 ccus.clear(); 3550 3551 //test surrogate malform 3552 ccus.put((char)0xD801); ccus.put((char)0xDD01); 3553 ccbs.put((byte)0x00); 3554 3555 ccus.limit(ccus.position()); 3556 ccus.position(0); 3557 ccbs.limit(ccbs.position()); 3558 ccbs.position(0); 3559 3560 try { 3561 smBufEncode(encoder, "ISCII-CC-EN-4", ccus, ccbs, true, false); 3562 errln("Exception while encoding ISCII should have been thrown."); 3563 } 3564 catch (Exception ex) { 3565 } 3566 3567 ccbs.clear(); 3568 ccus.clear(); 3569 3570 //test trail surrogate malform 3571 ccus.put((char)0xDD01); 3572 ccbs.put((byte)0x00); 3573 3574 ccus.limit(ccus.position()); 3575 ccus.position(0); 3576 ccbs.limit(ccbs.position()); 3577 ccbs.position(0); 3578 3579 try { 3580 smBufEncode(encoder, "ISCII-CC-EN-5", ccus, ccbs, true, false); 3581 errln("Exception while encoding ISCII should have been thrown."); 3582 } 3583 catch (Exception ex) { 3584 } 3585 3586 ccbs.clear(); 3587 ccus.clear(); 3588 3589 //test lead surrogates malform 3590 ccus.put((char)0xD801); ccus.put((char)0xD802); 3591 ccbs.put((byte)0x00); 3592 3593 ccus.limit(ccus.position()); 3594 ccus.position(0); 3595 ccbs.limit(ccbs.position()); 3596 ccbs.position(0); 3597 3598 try { 3599 smBufEncode(encoder, "ISCII-CC-EN-6", ccus, ccbs, true, false); 3600 errln("Exception while encoding ISCII should have been thrown."); 3601 } 3602 catch (Exception ex) { 3603 } 3604 3605 ccus.clear(); 3606 ccbs.clear(); 3607 3608 //test overflow buffer 3609 ccus.put((char)0x0901); 3610 ccbs.put((byte)0x00); 3611 3612 ccus.limit(ccus.position()); 3613 ccus.position(0); 3614 ccbs.limit(ccbs.position()); 3615 ccbs.position(0); 3616 3617 cs = provider.charsetForName("ISCII,version=0"); 3618 encoder = cs.newEncoder(); 3619 3620 try { 3621 smBufEncode(encoder, "ISCII-CC-EN-7", ccus, ccbs, true, false); 3622 errln("Exception while encoding ISCII should have been thrown."); 3623 } 3624 catch (Exception ex) { 3625 } 3626 //end of charset encoder code coverage code 3627 } 3628 3629 //Test for the IMAP Charset 3630 @Test TestCharsetIMAP()3631 public void TestCharsetIMAP() { 3632 CharsetProvider provider = new CharsetProviderICU(); 3633 Charset cs = provider.charsetForName("IMAP-mailbox-name"); 3634 CharsetEncoder encoder = cs.newEncoder(); 3635 CharsetDecoder decoder = cs.newDecoder(); 3636 3637 CharBuffer us = CharBuffer.allocate(0x20); 3638 ByteBuffer bs = ByteBuffer.allocate(0x20); 3639 3640 us.put((char)0x00A3); us.put((char)0x2020); us.put((char)0x41); 3641 3642 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x4B); bs.put((byte)0x4D); bs.put((byte)0x67); bs.put((byte)0x49); 3643 bs.put((byte)0x41); bs.put((byte)0x2D); bs.put((byte)0x41); 3644 3645 3646 bs.limit(bs.position()); 3647 bs.position(0); 3648 us.limit(us.position()); 3649 us.position(0); 3650 3651 smBufDecode(decoder, "IMAP", bs, us); 3652 smBufEncode(encoder, "IMAP", us, bs); 3653 3654 //the rest of the code in this method is for better code coverage 3655 us.clear(); 3656 bs.clear(); 3657 3658 //start of charset encoder code coverage 3659 //test buffer overflow 3660 us.put((char)0x0026); us.put((char)0x17A9); 3661 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3662 3663 bs.limit(bs.position()); 3664 bs.position(0); 3665 us.limit(us.position()); 3666 us.position(0); 3667 3668 try { 3669 smBufEncode(encoder, "IMAP-EN-1", us, bs, true, false); 3670 errln("Exception while encoding IMAP (1) should have been thrown."); 3671 } catch(Exception ex) { 3672 } 3673 3674 us.clear(); 3675 bs.clear(); 3676 3677 //test buffer overflow 3678 us.put((char)0x17A9); us.put((char)0x0941); 3679 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3680 3681 bs.limit(bs.position()); 3682 bs.position(0); 3683 us.limit(us.position()); 3684 us.position(0); 3685 3686 try { 3687 smBufEncode(encoder, "IMAP-EN-2", us, bs, true, false); 3688 errln("Exception while encoding IMAP (2) should have been thrown."); 3689 } catch(Exception ex) { 3690 } 3691 3692 us.clear(); 3693 bs.clear(); 3694 3695 //test buffer overflow 3696 us.put((char)0x17A9); us.put((char)0x0941); 3697 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3698 3699 bs.limit(bs.position()); 3700 bs.position(0); 3701 us.limit(us.position()); 3702 us.position(0); 3703 3704 try { 3705 smBufEncode(encoder, "IMAP-EN-3", us, bs, true, false); 3706 errln("Exception while encoding IMAP (3) should have been thrown."); 3707 } catch(Exception ex) { 3708 } 3709 3710 us.clear(); 3711 bs.clear(); 3712 3713 //test buffer overflow 3714 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3715 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3716 bs.put((byte)0x00); 3717 3718 bs.limit(bs.position()); 3719 bs.position(0); 3720 us.limit(us.position()); 3721 us.position(0); 3722 3723 try { 3724 smBufEncode(encoder, "IMAP-EN-4", us, bs, true, false); 3725 errln("Exception while encoding IMAP (4) should have been thrown."); 3726 } catch(Exception ex) { 3727 } 3728 3729 us.clear(); 3730 bs.clear(); 3731 3732 //test buffer overflow 3733 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3734 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3735 bs.put((byte)0x00); bs.put((byte)0x00); 3736 3737 bs.limit(bs.position()); 3738 bs.position(0); 3739 us.limit(us.position()); 3740 us.position(0); 3741 3742 try { 3743 smBufEncode(encoder, "IMAP-EN-5", us, bs, true, false); 3744 errln("Exception while encoding IMAP (5) should have been thrown."); 3745 } catch(Exception ex) { 3746 } 3747 3748 us.clear(); 3749 bs.clear(); 3750 3751 //test buffer overflow 3752 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); us.put((char)0x0970); 3753 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3754 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3755 3756 bs.limit(bs.position()); 3757 bs.position(0); 3758 us.limit(us.position()); 3759 us.position(0); 3760 3761 try { 3762 smBufEncode(encoder, "IMAP-EN-6", us, bs, true, false); 3763 errln("Exception while encoding IMAP (6) should have been thrown."); 3764 } catch(Exception ex) { 3765 } 3766 3767 us.clear(); 3768 bs.clear(); 3769 3770 //test buffer overflow 3771 us.put((char)0x17A9); us.put((char)0x0941); 3772 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3773 bs.put((byte)0x00); 3774 3775 bs.limit(bs.position()); 3776 bs.position(0); 3777 us.limit(us.position()); 3778 us.position(0); 3779 3780 try { 3781 smBufEncode(encoder, "IMAP-EN-7", us, bs, true, true); 3782 errln("Exception while encoding IMAP (7) should have been thrown."); 3783 } catch(Exception ex) { 3784 } 3785 3786 us.clear(); 3787 bs.clear(); 3788 3789 //test flushing 3790 us.put((char)0x17A9); us.put((char)0x0941); 3791 bs.put((byte)0x26); bs.put((byte)0x46); bs.put((byte)0x36); bs.put((byte)0x6b); bs.put((byte)0x4a); bs.put((byte)0x51); 3792 bs.put((byte)0x51); bs.put((byte)0x2d); 3793 3794 bs.limit(bs.position()); 3795 bs.position(0); 3796 us.limit(us.position()); 3797 us.position(0); 3798 3799 try { 3800 smBufEncode(encoder, "IMAP-EN-8", us, bs, true, true); 3801 } catch(Exception ex) { 3802 errln("Exception while encoding IMAP (8) should not have been thrown."); 3803 } 3804 3805 us = CharBuffer.allocate(0x08); 3806 bs = ByteBuffer.allocate(0x08); 3807 3808 //test flushing buffer overflow 3809 us.put((char)0x0061); 3810 bs.put((byte)0x61); bs.put((byte)0x00); 3811 3812 bs.limit(bs.position()); 3813 bs.position(0); 3814 us.limit(us.position()); 3815 us.position(0); 3816 3817 try { 3818 smBufEncode(encoder, "IMAP-EN-9", us, bs, true, true); 3819 } catch(Exception ex) { 3820 errln("Exception while encoding IMAP (9) should not have been thrown."); 3821 } 3822 //end of charset encoder code coverage 3823 3824 us = CharBuffer.allocate(0x10); 3825 bs = ByteBuffer.allocate(0x10); 3826 3827 //start of charset decoder code coverage 3828 //test malform case 2 3829 us.put((char)0x0000); us.put((char)0x0000); 3830 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x43); bs.put((byte)0x41); 3831 3832 bs.limit(bs.position()); 3833 bs.position(0); 3834 us.limit(us.position()); 3835 us.position(0); 3836 3837 try { 3838 smBufDecode(decoder, "IMAP-DE-1", bs, us, true, false); 3839 errln("Exception while decoding IMAP (1) should have been thrown."); 3840 } catch(Exception ex) { 3841 } 3842 3843 us.clear(); 3844 bs.clear(); 3845 3846 //test malform case 5 3847 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3848 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3849 bs.put((byte)0x41); bs.put((byte)0x49); bs.put((byte)0x41); 3850 3851 bs.limit(bs.position()); 3852 bs.position(0); 3853 us.limit(us.position()); 3854 us.position(0); 3855 3856 try { 3857 smBufDecode(decoder, "IMAP-DE-2", bs, us, true, false); 3858 errln("Exception while decoding IMAP (2) should have been thrown."); 3859 } catch(Exception ex) { 3860 } 3861 3862 us.clear(); 3863 bs.clear(); 3864 3865 //test malform case 7 3866 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3867 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3868 bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x42); 3869 bs.put((byte)0x41); 3870 3871 bs.limit(bs.position()); 3872 bs.position(0); 3873 us.limit(us.position()); 3874 us.position(0); 3875 3876 try { 3877 smBufDecode(decoder, "IMAP-DE-3", bs, us, true, false); 3878 errln("Exception while decoding IMAP (3) should have been thrown."); 3879 } catch(Exception ex) { 3880 } 3881 //end of charset decoder coder coverage 3882 } 3883 3884 //Test for charset UTF32LE to provide better code coverage 3885 @Test TestCharsetUTF32LE()3886 public void TestCharsetUTF32LE() { 3887 CoderResult result = CoderResult.UNDERFLOW; 3888 CharsetProvider provider = new CharsetProviderICU(); 3889 Charset cs = provider.charsetForName("UTF-32LE"); 3890 CharsetEncoder encoder = cs.newEncoder(); 3891 //CharsetDecoder decoder = cs.newDecoder(); 3892 3893 CharBuffer us = CharBuffer.allocate(0x10); 3894 ByteBuffer bs = ByteBuffer.allocate(0x10); 3895 3896 3897 //test malform surrogate 3898 us.put((char)0xD901); 3899 bs.put((byte)0x00); 3900 3901 bs.limit(bs.position()); 3902 bs.position(0); 3903 us.limit(us.position()); 3904 us.position(0); 3905 3906 try { 3907 smBufEncode(encoder, "UTF32LE-EN-1", us, bs, true, false); 3908 errln("Exception while encoding UTF32LE (1) should have been thrown."); 3909 } catch (Exception ex) { 3910 } 3911 3912 bs.clear(); 3913 us.clear(); 3914 3915 //test malform surrogate 3916 us.put((char)0xD901); us.put((char)0xD902); 3917 bs.put((byte)0x00); 3918 3919 bs.limit(bs.position()); 3920 bs.position(0); 3921 us.limit(us.position()); 3922 us.position(0); 3923 3924 result = encoder.encode(us, bs, true); 3925 3926 if (!result.isError() && !result.isOverflow()) { 3927 errln("Error while encoding UTF32LE (2) should have occurred."); 3928 } 3929 3930 bs.clear(); 3931 us.clear(); 3932 3933 //test overflow trail surrogate 3934 us.put((char)0xDD01); us.put((char)0xDD0E); us.put((char)0xDD0E); 3935 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3936 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3937 3938 bs.limit(bs.position()); 3939 bs.position(0); 3940 us.limit(us.position()); 3941 us.position(0); 3942 3943 result = encoder.encode(us, bs, true); 3944 3945 if (!result.isError() && !result.isOverflow()) { 3946 errln("Error while encoding UTF32LE (3) should have occurred."); 3947 } 3948 3949 bs.clear(); 3950 us.clear(); 3951 3952 //test malform lead surrogate 3953 us.put((char)0xD90D); us.put((char)0xD90E); 3954 bs.put((byte)0x00); 3955 3956 bs.limit(bs.position()); 3957 bs.position(0); 3958 us.limit(us.position()); 3959 us.position(0); 3960 3961 try { 3962 smBufEncode(encoder, "UTF32LE-EN-4", us, bs, true, false); 3963 errln("Exception while encoding UTF32LE (4) should have been thrown."); 3964 } catch (Exception ex) { 3965 } 3966 3967 bs.clear(); 3968 us.clear(); 3969 3970 //test overflow buffer 3971 us.put((char)0x0061); 3972 bs.put((byte)0x00); 3973 3974 bs.limit(bs.position()); 3975 bs.position(0); 3976 us.limit(us.position()); 3977 us.position(0); 3978 3979 try { 3980 smBufEncode(encoder, "UTF32LE-EN-5", us, bs, true, false); 3981 errln("Exception while encoding UTF32LE (5) should have been thrown."); 3982 } catch (Exception ex) { 3983 } 3984 3985 bs.clear(); 3986 us.clear(); 3987 3988 //test malform trail surrogate 3989 us.put((char)0xDD01); 3990 bs.put((byte)0x00); 3991 3992 bs.limit(bs.position()); 3993 bs.position(0); 3994 us.limit(us.position()); 3995 us.position(0); 3996 3997 try { 3998 smBufEncode(encoder, "UTF32LE-EN-6", us, bs, true, false); 3999 errln("Exception while encoding UTF32LE (6) should have been thrown."); 4000 } catch (Exception ex) { 4001 } 4002 } 4003 4004 //Test for charset UTF16LE to provide better code coverage 4005 @Test TestCharsetUTF16LE()4006 public void TestCharsetUTF16LE() { 4007 CoderResult result = CoderResult.UNDERFLOW; 4008 CharsetProvider provider = new CharsetProviderICU(); 4009 Charset cs = provider.charsetForName("UTF-16LE"); 4010 CharsetEncoder encoder = cs.newEncoder(); 4011 //CharsetDecoder decoder = cs.newDecoder(); 4012 4013 // Test for malform and change fromUChar32 for next call 4014 char u_pts1[] = { 4015 (char)0xD805, 4016 (char)0xDC01, (char)0xDC02, (char)0xDC03, 4017 (char)0xD901, (char)0xD902 4018 }; 4019 byte b_pts1[] = { 4020 (byte)0x00, 4021 (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 4022 }; 4023 4024 CharBuffer us = CharBuffer.allocate(u_pts1.length); 4025 ByteBuffer bs = ByteBuffer.allocate(b_pts1.length); 4026 4027 us.put(u_pts1); 4028 bs.put(b_pts1); 4029 4030 us.limit(1); 4031 us.position(0); 4032 bs.limit(1); 4033 bs.position(0); 4034 4035 result = encoder.encode(us, bs, true); 4036 4037 if (!result.isMalformed()) { 4038 // LE should not output BOM, so this should be malformed 4039 errln("Malformed while encoding UTF-16LE (1) should have occured."); 4040 } 4041 4042 // Test for malform surrogate from previous buffer 4043 us.limit(4); 4044 us.position(1); 4045 bs.limit(7); 4046 bs.position(1); 4047 4048 result = encoder.encode(us, bs, true); 4049 4050 if (!result.isMalformed()) { 4051 errln("Error while encoding UTF-16LE (2) should have occured."); 4052 } 4053 4054 // Test for malform trail surrogate 4055 encoder.reset(); 4056 4057 us.limit(1); 4058 us.position(0); 4059 bs.limit(1); 4060 bs.position(0); 4061 4062 result = encoder.encode(us, bs, true); 4063 4064 us.limit(6); 4065 us.position(4); 4066 bs.limit(4); 4067 bs.position(1); 4068 4069 result = encoder.encode(us, bs, true); 4070 4071 if (!result.isMalformed()) { 4072 errln("Error while encoding UTF-16LE (3) should have occured."); 4073 } 4074 } 4075 4076 //provide better code coverage for the generic charset UTF32 4077 @Test TestCharsetUTF32()4078 public void TestCharsetUTF32() { 4079 CoderResult result = CoderResult.UNDERFLOW; 4080 CharsetProvider provider = new CharsetProviderICU(); 4081 Charset cs = provider.charsetForName("UTF-32"); 4082 CharsetDecoder decoder = cs.newDecoder(); 4083 CharsetEncoder encoder = cs.newEncoder(); 4084 4085 //start of decoding code coverage 4086 char us_array[] = { 4087 0x0000, 0x0000, 0x0000, 0x0000, 4088 }; 4089 4090 byte bs_array1[] = { 4091 (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF, 4092 (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43, 4093 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4094 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4095 }; 4096 4097 byte bs_array2[] = { 4098 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4099 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4100 }; 4101 4102 CharBuffer us = CharBuffer.allocate(us_array.length); 4103 ByteBuffer bs = ByteBuffer.allocate(bs_array1.length); 4104 4105 us.put(us_array); 4106 bs.put(bs_array1); 4107 4108 us.limit(us.position()); 4109 us.position(0); 4110 bs.limit(bs.position()); 4111 bs.position(0); 4112 4113 try { 4114 smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false); 4115 errln("Malform exception while decoding UTF32 charset (1) should have been thrown."); 4116 } catch (Exception ex) { 4117 } 4118 4119 decoder = cs.newDecoder(); 4120 4121 bs = ByteBuffer.allocate(bs_array2.length); 4122 bs.put(bs_array2); 4123 4124 us.limit(4); 4125 us.position(0); 4126 bs.limit(bs.position()); 4127 bs.position(0); 4128 4129 try { 4130 smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false); 4131 } catch (Exception ex) { 4132 // should recognize little endian BOM 4133 errln("Exception while decoding UTF32 charset (2) should not have been thrown."); 4134 } 4135 4136 //Test malform exception 4137 bs.clear(); 4138 us.clear(); 4139 4140 bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00); 4141 us.put((char)0x0000); 4142 4143 us.limit(us.position()); 4144 us.position(0); 4145 bs.limit(bs.position()); 4146 bs.position(0); 4147 4148 try { 4149 smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false); 4150 errln("Malform exception while decoding UTF32 charset (3) should have been thrown."); 4151 } catch (Exception ex) { 4152 } 4153 4154 //Test BOM testing 4155 bs.clear(); 4156 us.clear(); 4157 4158 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE); 4159 us.put((char)0x0000); 4160 4161 us.limit(us.position()); 4162 us.position(0); 4163 bs.limit(bs.position()); 4164 bs.position(0); 4165 4166 try { 4167 smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false); 4168 } catch (Exception ex) { 4169 // should recognize big endian BOM 4170 errln("Exception while decoding UTF32 charset (4) should not have been thrown."); 4171 } 4172 //end of decoding code coverage 4173 4174 //start of encoding code coverage 4175 us = CharBuffer.allocate(0x10); 4176 bs = ByteBuffer.allocate(0x10); 4177 4178 //test wite BOM overflow error 4179 us.put((char)0xDC01); 4180 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4181 4182 us.limit(us.position()); 4183 us.position(0); 4184 bs.limit(bs.position()); 4185 bs.position(0); 4186 4187 result = encoder.encode(us, bs, true); 4188 // must try to output BOM first for UTF-32 (not UTF-32BE or UTF-32LE) 4189 if (!result.isOverflow()) { 4190 errln("Buffer overflow error while encoding UTF32 charset (1) should have occurred."); 4191 } 4192 4193 us.clear(); 4194 bs.clear(); 4195 4196 //test malform surrogate and store value in fromChar32 4197 us.put((char)0xD801); us.put((char)0xD802); 4198 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4199 4200 us.limit(us.position()); 4201 us.position(0); 4202 bs.limit(bs.position()); 4203 bs.position(0); 4204 4205 result = encoder.encode(us, bs, true); 4206 if (!result.isMalformed()) { 4207 errln("Malformed error while encoding UTF32 charset (2) should have occurred."); 4208 } 4209 4210 us.clear(); 4211 bs.clear(); 4212 4213 //test malform surrogate 4214 us.put((char)0x0000); us.put((char)0xD902); 4215 4216 us.limit(us.position()); 4217 us.position(0); 4218 bs.limit(bs.position()); 4219 bs.position(0); 4220 4221 result = encoder.encode(us, bs, true); 4222 if (!result.isOverflow()) { 4223 errln("Overflow error while encoding UTF32 charset (3) should have occurred."); 4224 } 4225 4226 us.clear(); 4227 bs.clear(); 4228 4229 //test malform surrogate 4230 encoder.reset(); 4231 us.put((char)0xD801); 4232 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4233 4234 us.limit(us.position()); 4235 us.position(0); 4236 bs.limit(bs.position()); 4237 bs.position(0); 4238 4239 result = encoder.encode(us, bs, true); 4240 if (!result.isMalformed()) { 4241 errln("Malform error while encoding UTF32 charset (4) should have occurred."); 4242 } 4243 4244 us.clear(); 4245 bs.clear(); 4246 4247 //test overflow surrogate 4248 us.put((char)0x0000); us.put((char)0xDDE1); us.put((char)0xD915); us.put((char)0xDDF2); 4249 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4250 4251 us.limit(us.position()); 4252 us.position(0); 4253 bs.limit(bs.position()); 4254 bs.position(0); 4255 4256 result = encoder.encode(us, bs, true); 4257 if (!result.isOverflow()) { 4258 errln("Overflow error while encoding UTF32 charset (5) should have occurred."); 4259 } 4260 4261 us.clear(); 4262 bs.clear(); 4263 4264 //test malform surrogate 4265 encoder.reset(); 4266 us.put((char)0xDDE1); 4267 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4268 4269 us.limit(us.position()); 4270 us.position(0); 4271 bs.limit(bs.position()); 4272 bs.position(0); 4273 4274 result = encoder.encode(us, bs, true); 4275 if (!result.isMalformed()) { 4276 errln("Malform error while encoding UTF32 charset (6) should have occurred."); 4277 } 4278 //end of encoding code coverage 4279 } 4280 4281 //this method provides better code coverage decoding UTF32 LE/BE 4282 @Test TestDecodeUTF32LEBE()4283 public void TestDecodeUTF32LEBE() { 4284 CoderResult result = CoderResult.UNDERFLOW; 4285 CharsetProvider provider = new CharsetProviderICU(); 4286 CharsetDecoder decoder; 4287 CharBuffer us = CharBuffer.allocate(0x10); 4288 ByteBuffer bs = ByteBuffer.allocate(0x10); 4289 4290 //decode UTF32LE 4291 decoder = provider.charsetForName("UTF-32LE").newDecoder(); 4292 //test overflow buffer 4293 bs.put((byte)0x41); bs.put((byte)0xFF); bs.put((byte)0x01); bs.put((byte)0x00); 4294 us.put((char)0x0000); 4295 4296 us.limit(us.position()); 4297 us.position(0); 4298 bs.limit(bs.position()); 4299 bs.position(0); 4300 4301 try { 4302 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4303 errln("Overflow exception while decoding UTF32LE (1) should have been thrown."); 4304 } catch (Exception ex) { 4305 } 4306 // test overflow buffer handling in CharsetDecoderICU 4307 bs.position(0); 4308 us.position(0); 4309 decoder.reset(); 4310 result = decoder.decode(bs, us, true); 4311 if (result.isOverflow()) { 4312 result = decoder.decode(bs, us, true); 4313 if (!result.isOverflow()) { 4314 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4315 } 4316 } else { 4317 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4318 } 4319 4320 us.clear(); 4321 bs.clear(); 4322 //test malform buffer 4323 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4324 us.put((char)0x0000); 4325 4326 us.limit(us.position()); 4327 us.position(0); 4328 bs.limit(bs.position()); 4329 bs.position(0); 4330 4331 try { 4332 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4333 errln("Malform exception while decoding UTF32LE (2) should have been thrown."); 4334 } catch (Exception ex) { 4335 } 4336 4337 us.clear(); 4338 bs.clear(); 4339 //test malform buffer 4340 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4341 bs.put((byte)0xFF); bs.put((byte)0xDF); bs.put((byte)0x10); 4342 us.put((char)0x0000); 4343 4344 us.limit(us.position()); 4345 us.position(0); 4346 bs.limit(bs.position()); 4347 bs.position(0); 4348 4349 try { 4350 // must flush in order to exhibit malformed behavior 4351 smBufDecode(decoder, "UTF-32LE", bs, us, true, true); 4352 errln("Malform exception while decoding UTF32LE (3) should have been thrown."); 4353 } catch (Exception ex) { 4354 } 4355 4356 us.clear(); 4357 bs.clear(); 4358 //test malform buffer 4359 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4360 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4361 us.put((char)0x0000); 4362 4363 us.limit(us.position()); 4364 us.position(0); 4365 bs.limit(bs.position()); 4366 bs.position(0); 4367 4368 try { 4369 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4370 errln("Malform exception while decoding UTF32LE (4) should have been thrown."); 4371 } catch (Exception ex) { 4372 } 4373 4374 us.clear(); 4375 bs.clear(); 4376 //test overflow buffer 4377 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4378 bs.put((byte)0xDD); bs.put((byte)0xFF); bs.put((byte)0x10); bs.put((byte)0x00); 4379 us.put((char)0x0000); 4380 4381 us.limit(us.position()); 4382 us.position(0); 4383 bs.limit(bs.position()); 4384 bs.position(0); 4385 4386 try { 4387 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4388 errln("Overflow exception while decoding UTF32LE (5) should have been thrown."); 4389 } catch (Exception ex) { 4390 } 4391 //end of decode UTF32LE 4392 4393 bs.clear(); 4394 us.clear(); 4395 4396 //decode UTF32BE 4397 decoder = provider.charsetForName("UTF-32BE").newDecoder(); 4398 //test overflow buffer 4399 bs.put((byte)0x00); bs.put((byte)0x01); bs.put((byte)0xFF); bs.put((byte)0x41); 4400 us.put((char)0x0000); 4401 4402 us.limit(us.position()); 4403 us.position(0); 4404 bs.limit(bs.position()); 4405 bs.position(0); 4406 4407 try { 4408 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4409 errln("Overflow exception while decoding UTF32BE (1) should have been thrown."); 4410 } catch (Exception ex) { 4411 } 4412 4413 bs.clear(); 4414 us.clear(); 4415 //test malform buffer 4416 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xD9); bs.put((byte)0x02); 4417 us.put((char)0x0000); 4418 4419 us.limit(us.position()); 4420 us.position(0); 4421 bs.limit(bs.position()); 4422 bs.position(0); 4423 4424 try { 4425 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4426 errln("Malform exception while decoding UTF32BE (2) should have been thrown."); 4427 } catch (Exception ex) { 4428 } 4429 4430 bs.clear(); 4431 us.clear(); 4432 //test malform buffer 4433 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4434 bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDF); 4435 us.put((char)0x0000); 4436 4437 us.limit(us.position()); 4438 us.position(0); 4439 bs.limit(bs.position()); 4440 bs.position(0); 4441 4442 try { 4443 // must flush to exhibit malformed behavior 4444 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4445 errln("Malform exception while decoding UTF32BE (3) should have been thrown."); 4446 } catch (Exception ex) { 4447 } 4448 4449 bs.clear(); 4450 us.clear(); 4451 //test overflow buffer 4452 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4453 bs.put((byte)0x00); bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDD); 4454 us.put((char)0x0000); 4455 4456 us.limit(us.position()); 4457 us.position(0); 4458 bs.limit(bs.position()); 4459 bs.position(0); 4460 4461 try { 4462 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4463 errln("Overflow exception while decoding UTF32BE (4) should have been thrown."); 4464 } catch (Exception ex) { 4465 } 4466 4467 bs.clear(); 4468 us.clear(); 4469 //test malform buffer 4470 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); 4471 us.put((char)0x0000); 4472 4473 us.limit(us.position()); 4474 us.position(0); 4475 bs.limit(bs.position()); 4476 bs.position(0); 4477 4478 try { 4479 // must flush to exhibit malformed behavior 4480 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4481 errln("Malform exception while decoding UTF32BE (5) should have been thrown."); 4482 } catch (Exception ex) { 4483 } 4484 //end of decode UTF32BE 4485 } 4486 4487 //provide better code coverage for UTF8 4488 @Test TestCharsetUTF8()4489 public void TestCharsetUTF8() { 4490 CoderResult result = CoderResult.UNDERFLOW; 4491 CharsetProvider provider = new CharsetProviderICU(); 4492 CharsetDecoder decoder = provider.charsetForName("UTF-8").newDecoder(); 4493 CharsetEncoder encoder = provider.charsetForName("UTF-8").newEncoder(); 4494 4495 CharBuffer us = CharBuffer.allocate(0x10); 4496 ByteBuffer bs = ByteBuffer.allocate(0x10); 4497 ByteBuffer bs2; 4498 CharBuffer us2; 4499 int limit_us; 4500 int limit_bs; 4501 4502 //encode and decode using read only buffer 4503 encoder.reset(); 4504 decoder.reset(); 4505 us.put((char)0x0041); us.put((char)0x0081); us.put((char)0xEF65); us.put((char)0xD902); 4506 bs.put((byte)0x41); bs.put((byte)0xc2); bs.put((byte)0x81); bs.put((byte)0xee); bs.put((byte)0xbd); bs.put((byte)0xa5); 4507 bs.put((byte)0x00); 4508 limit_us = us.position(); 4509 limit_bs = bs.position(); 4510 4511 us.limit(limit_us); 4512 us.position(0); 4513 bs.limit(limit_bs); 4514 bs.position(0); 4515 bs2 = bs.asReadOnlyBuffer(); 4516 us2 = us.asReadOnlyBuffer(); 4517 4518 result = decoder.decode(bs2, us, true); 4519 if (!result.isUnderflow() || !equals(us, us2)) { 4520 errln("Error while decoding UTF-8 (1) should not have occured."); 4521 } 4522 4523 us2.limit(limit_us); 4524 us2.position(0); 4525 bs.limit(limit_bs); 4526 bs.position(0); 4527 4528 result = encoder.encode(us2, bs, true); 4529 if (!result.isUnderflow() || !equals(bs, bs2)) { 4530 errln("Error while encoding UTF-8 (1) should not have occured."); 4531 } 4532 4533 us.clear(); 4534 bs.clear(); 4535 4536 //test overflow buffer while encoding 4537 //readonly buffer 4538 encoder.reset(); 4539 us.put((char)0x0081); us.put((char)0xEF65); 4540 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4541 limit_us = us.position(); 4542 us2 = us.asReadOnlyBuffer(); 4543 us2.limit(limit_us); 4544 us2.position(0); 4545 bs.limit(1); 4546 bs.position(0); 4547 result = encoder.encode(us2, bs, true); 4548 if (!result.isOverflow()) { 4549 errln("Overflow Error should have occured while encoding UTF-8 (2)."); 4550 } 4551 4552 encoder.reset(); 4553 4554 us2.limit(limit_us); 4555 us2.position(1); 4556 bs.limit(1); 4557 bs.position(0); 4558 result = encoder.encode(us2, bs, true); 4559 if (!result.isOverflow()) { 4560 errln("Overflow Error should have occured while encoding UTF-8 (3)."); 4561 } 4562 4563 encoder.reset(); 4564 4565 us2.limit(limit_us); 4566 us2.position(1); 4567 bs.limit(2); 4568 bs.position(0); 4569 result = encoder.encode(us2, bs, true); 4570 if (!result.isOverflow()) { 4571 errln("Overflow Error should have occured while encoding UTF-8 (4)."); 4572 } 4573 4574 encoder.reset(); 4575 4576 us2.limit(limit_us); 4577 us2.position(0); 4578 bs.limit(2); 4579 bs.position(0); 4580 result = encoder.encode(us2, bs, true); 4581 if (!result.isOverflow()) { 4582 errln("Overflow Error should have occured while encoding UTF-8 (5)."); 4583 } 4584 4585 //not readonly buffer 4586 encoder.reset(); 4587 4588 us.limit(limit_us); 4589 us.position(0); 4590 bs.limit(1); 4591 bs.position(0); 4592 result = encoder.encode(us, bs, true); 4593 if (!result.isOverflow()) { 4594 errln("Overflow Error should have occured while encoding UTF-8 (6)."); 4595 } 4596 4597 encoder.reset(); 4598 4599 us.limit(limit_us); 4600 us.position(0); 4601 bs.limit(3); 4602 bs.position(0); 4603 result = encoder.encode(us, bs, true); 4604 if (!result.isOverflow()) { 4605 errln("Overflow Error should have occured while encoding UTF-8 (7)."); 4606 } 4607 4608 encoder.reset(); 4609 4610 us.limit(limit_us); 4611 us.position(1); 4612 bs.limit(2); 4613 bs.position(0); 4614 result = encoder.encode(us, bs, true); 4615 if (!result.isOverflow()) { 4616 errln("Overflow Error should have occured while encoding UTF-8 (8)."); 4617 } 4618 4619 encoder.reset(); 4620 4621 us.limit(limit_us + 1); 4622 us.position(1); 4623 bs.limit(3); 4624 bs.position(0); 4625 result = encoder.encode(us, bs, true); 4626 if (!result.isOverflow()) { 4627 errln("Overflow Error should have occured while encoding UTF-8 (9)."); 4628 } 4629 4630 us.clear(); 4631 bs.clear(); 4632 4633 //test encoding 4 byte characters 4634 encoder.reset(); 4635 us.put((char)0xD902); us.put((char)0xDD02); us.put((char)0x0041); 4636 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4637 limit_us = us.position(); 4638 us2 = us.asReadOnlyBuffer(); 4639 us2.limit(limit_us); 4640 us2.position(0); 4641 bs.limit(1); 4642 bs.position(0); 4643 result = encoder.encode(us2, bs, true); 4644 if (!result.isOverflow()) { 4645 errln("Overflow Error should have occured while encoding UTF-8 (10)."); 4646 } 4647 4648 encoder.reset(); 4649 4650 us2.limit(limit_us); 4651 us2.position(0); 4652 bs.limit(2); 4653 bs.position(0); 4654 result = encoder.encode(us2, bs, true); 4655 if (!result.isOverflow()) { 4656 errln("Overflow Error should have occured while encoding UTF-8 (11)."); 4657 } 4658 4659 encoder.reset(); 4660 4661 us2.limit(limit_us); 4662 us2.position(0); 4663 bs.limit(3); 4664 bs.position(0); 4665 result = encoder.encode(us2, bs, true); 4666 if (!result.isOverflow()) { 4667 errln("Overflow Error should have occured while encoding UTF-8 (12)."); 4668 } 4669 4670 encoder.reset(); 4671 4672 us2.limit(limit_us); 4673 us2.position(0); 4674 bs.limit(4); 4675 bs.position(0); 4676 result = encoder.encode(us2, bs, true); 4677 if (!result.isOverflow()) { 4678 errln("Overflow Error should have occured while encoding UTF-8 (13)."); 4679 } 4680 4681 us.clear(); 4682 bs.clear(); 4683 4684 //decoding code coverage 4685 //test malform error 4686 decoder.reset(); 4687 bs.put((byte)0xC2); bs.put((byte)0xC2); 4688 us.put((char)0x0000); 4689 bs2 = bs.asReadOnlyBuffer(); 4690 4691 us.limit(1); 4692 us.position(0); 4693 bs2.limit(1); 4694 bs2.position(0); 4695 4696 result = decoder.decode(bs2, us, true); 4697 result = decoder.flush(us); 4698 if (!result.isMalformed()) { 4699 errln("Malform error should have occurred while decoding UTF-8 (1)."); 4700 } 4701 4702 us.limit(1); 4703 us.position(0); 4704 bs2.limit(1); 4705 bs2.position(0); 4706 4707 decoder.reset(); 4708 4709 result = decoder.decode(bs2, us, true); 4710 us.limit(1); 4711 us.position(0); 4712 bs2.limit(2); 4713 bs2.position(0); 4714 result = decoder.decode(bs2, us, true); 4715 if (!result.isMalformed()) { 4716 errln("Malform error should have occurred while decoding UTF-8 (2)."); 4717 } 4718 4719 us.clear(); 4720 bs.clear(); 4721 4722 //test overflow buffer 4723 bs.put((byte)0x01); bs.put((byte)0x41); 4724 us.put((char)0x0000); 4725 bs2 = bs.asReadOnlyBuffer(); 4726 us.limit(1); 4727 us.position(0); 4728 bs2.limit(2); 4729 bs2.position(0); 4730 4731 result = decoder.decode(bs2, us, true); 4732 if (!result.isOverflow()) { 4733 errln("Overflow error should have occurred while decoding UTF-8 (3)."); 4734 } 4735 4736 us.clear(); 4737 bs.clear(); 4738 4739 //test malform string 4740 decoder.reset(); 4741 bs.put((byte)0xF5); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4742 us.put((char)0x0000); 4743 bs2 = bs.asReadOnlyBuffer(); 4744 us.limit(1); 4745 us.position(0); 4746 bs2.limit(4); 4747 bs2.position(0); 4748 4749 result = decoder.decode(bs2, us, true); 4750 if (!result.isMalformed()) { 4751 errln("Malform error should have occurred while decoding UTF-8 (4)."); 4752 } 4753 4754 bs.clear(); 4755 4756 //test overflow 4757 decoder.reset(); 4758 bs.put((byte)0xF3); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4759 bs2 = bs.asReadOnlyBuffer(); 4760 us.limit(1); 4761 us.position(0); 4762 bs2.limit(4); 4763 bs2.position(0); 4764 4765 result = decoder.decode(bs2, us, true); 4766 if (!result.isOverflow()) { 4767 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4768 } 4769 4770 //test overflow 4771 decoder.reset(); 4772 us.limit(2); 4773 us.position(0); 4774 bs2.limit(5); 4775 bs2.position(0); 4776 4777 result = decoder.decode(bs2, us, true); 4778 if (!result.isOverflow()) { 4779 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4780 } 4781 4782 //test overflow 4783 decoder.reset(); 4784 us.limit(1); 4785 us.position(0); 4786 bs.limit(5); 4787 bs.position(0); 4788 4789 result = decoder.decode(bs, us, true); 4790 if (!result.isOverflow()) { 4791 errln("Overflow error should have occurred while decoding UTF-8 (6)."); 4792 } 4793 4794 bs.clear(); 4795 4796 //test overflow 4797 decoder.reset(); 4798 bs.put((byte)0x41); bs.put((byte)0x42); 4799 us.limit(1); 4800 us.position(0); 4801 bs.limit(2); 4802 bs.position(0); 4803 4804 result = decoder.decode(bs, us, true); 4805 if (!result.isOverflow()) { 4806 errln("Overflow error should have occurred while decoding UTF-8 (7)."); 4807 } 4808 4809 } 4810 4811 //provide better code coverage for Charset UTF16 4812 @Test TestCharsetUTF16()4813 public void TestCharsetUTF16() { 4814 CoderResult result = CoderResult.UNDERFLOW; 4815 CharsetProvider provider = new CharsetProviderICU(); 4816 CharsetDecoder decoder = provider.charsetForName("UTF-16").newDecoder(); 4817 CharsetEncoder encoder = provider.charsetForName("UTF-16").newEncoder(); 4818 4819 CharBuffer us = CharBuffer.allocate(0x10); 4820 ByteBuffer bs = ByteBuffer.allocate(0x10); 4821 4822 //test flush buffer and malform string 4823 bs.put((byte)0xFF); 4824 us.put((char)0x0000); 4825 4826 us.limit(us.position()); 4827 us.position(0); 4828 bs.limit(bs.position()); 4829 bs.position(0); 4830 4831 result = decoder.decode(bs, us, true); 4832 result = decoder.flush(us); 4833 if (!result.isMalformed()) { 4834 errln("Malform error while decoding UTF-16 should have occurred."); 4835 } 4836 4837 us.clear(); 4838 bs.clear(); 4839 4840 us.put((char)0xD902); us.put((char)0xDD01); us.put((char)0x0041); 4841 4842 us.limit(1); 4843 us.position(0); 4844 bs.limit(4); 4845 bs.position(0); 4846 4847 result = encoder.encode(us, bs, true); 4848 us.limit(3); 4849 us.position(0); 4850 bs.limit(3); 4851 bs.position(0); 4852 result = encoder.encode(us, bs, true); 4853 if (!result.isOverflow()) { 4854 errln("Overflow buffer while encoding UTF-16 should have occurred."); 4855 } 4856 4857 us.clear(); 4858 bs.clear(); 4859 4860 //test overflow buffer 4861 decoder.reset(); 4862 decoder = provider.charsetForName("UTF-16BE").newDecoder(); 4863 4864 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x41); 4865 4866 us.limit(0); 4867 us.position(0); 4868 bs.limit(3); 4869 bs.position(0); 4870 4871 result = decoder.decode(bs, us, true); 4872 if (!result.isOverflow()) { 4873 errln("Overflow buffer while decoding UTF-16 should have occurred."); 4874 } 4875 } 4876 4877 //provide better code coverage for Charset ISO-2022-KR 4878 @Test TestCharsetISO2022KR()4879 public void TestCharsetISO2022KR() { 4880 CoderResult result = CoderResult.UNDERFLOW; 4881 CharsetProvider provider = new CharsetProviderICU(); 4882 CharsetDecoder decoder = provider.charsetForName("ISO-2022-KR").newDecoder(); 4883 4884 byte bytearray[] = { 4885 (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x43, (byte)0x41, (byte)0x42, 4886 }; 4887 char chararray[] = { 4888 (char)0x0041 4889 }; 4890 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4891 CharBuffer cb = CharBuffer.wrap(chararray); 4892 4893 result = decoder.decode(bb, cb, true); 4894 4895 if (!result.isOverflow()) { 4896 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4897 } 4898 } 4899 4900 //provide better code coverage for Charset ISO-2022-JP 4901 @Test TestCharsetISO2022JP()4902 public void TestCharsetISO2022JP() { 4903 CoderResult result = CoderResult.UNDERFLOW; 4904 CharsetProvider provider = new CharsetProviderICU(); 4905 CharsetDecoder decoder = provider.charsetForName("ISO-2022-JP-2").newDecoder(); 4906 4907 byte bytearray[] = { 4908 (byte)0x1b, (byte)0x24, (byte)0x28, (byte)0x44, (byte)0x0A, (byte)0x41, 4909 }; 4910 char chararray[] = { 4911 (char)0x000A 4912 }; 4913 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4914 CharBuffer cb = CharBuffer.wrap(chararray); 4915 4916 result = decoder.decode(bb, cb, true); 4917 4918 if (!result.isOverflow()) { 4919 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4920 } 4921 } 4922 4923 //provide better code coverage for Charset ASCII 4924 @Test TestCharsetASCII()4925 public void TestCharsetASCII() { 4926 CoderResult result = CoderResult.UNDERFLOW; 4927 CharsetProvider provider = new CharsetProviderICU(); 4928 CharsetDecoder decoder = provider.charsetForName("US-ASCII").newDecoder(); 4929 4930 byte bytearray[] = { 4931 (byte)0x41 4932 }; 4933 char chararray[] = { 4934 (char)0x0041 4935 }; 4936 4937 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4938 CharBuffer cb = CharBuffer.wrap(chararray); 4939 4940 result = decoder.decode(bb, cb, true); 4941 result = decoder.flush(cb); 4942 4943 if (result.isError()) { 4944 errln("Error occurred while decoding US-ASCII."); 4945 } 4946 } 4947 4948 // provide better code coverage for Charset Callbacks 4949 /* Different aspects of callbacks are being tested including using different context available */ 4950 @Test TestCharsetCallbacks()4951 public void TestCharsetCallbacks() { 4952 CoderResult result = CoderResult.UNDERFLOW; 4953 CharsetProvider provider = new CharsetProviderICU(); 4954 CharsetEncoder encoder = provider.charsetForName("iso-2022-jp").newEncoder(); 4955 CharsetDecoder decoder = provider.charsetForName("iso-2022-jp").newDecoder(); 4956 4957 String context3[] = { 4958 "i", 4959 "J" 4960 }; 4961 4962 // Testing encoder escape callback 4963 String context1[] = { 4964 "J", 4965 "C", 4966 "D", 4967 null 4968 }; 4969 char chararray[] = { 4970 (char)0xd122 4971 }; 4972 ByteBuffer bb = ByteBuffer.allocate(20); 4973 CharBuffer cb = CharBuffer.wrap(chararray); 4974 4975 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.OVERFLOW, CharsetCallback.FROM_U_CALLBACK_ESCAPE, null); // This callback is not valid. 4976 for (int i = 0; i < context1.length; i++) { 4977 encoder.reset(); 4978 cb.position(0); 4979 bb.position(0); 4980 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_ESCAPE, context1[i]); // This callback is valid. 4981 4982 result = encoder.encode(cb, bb, true); 4983 if (result.isError()) { 4984 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4985 } 4986 } 4987 4988 // Testing encoder skip callback 4989 for (int i = 0; i < context3.length; i++) { 4990 encoder.reset(); 4991 cb.position(0); 4992 bb.position(0); 4993 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SKIP, context3[i]); 4994 4995 result = encoder.encode(cb, bb, true); 4996 if (result.isError() && i == 0) { 4997 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4998 } 4999 } 5000 5001 // Testing encoder sub callback 5002 for (int i = 0; i < context3.length; i++) { 5003 encoder.reset(); 5004 cb.position(0); 5005 bb.position(0); 5006 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE, context3[i]); 5007 5008 result = encoder.encode(cb, bb, true); 5009 if (result.isError() && i == 0) { 5010 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5011 } 5012 } 5013 5014 // Testing decoder escape callback 5015 String context2[] = { 5016 "X", 5017 "C", 5018 "D", 5019 null 5020 }; 5021 byte bytearray[] = { 5022 (byte)0x1b, (byte)0x2e, (byte)0x43 5023 }; 5024 bb = ByteBuffer.wrap(bytearray); 5025 cb = CharBuffer.allocate(20); 5026 5027 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_ESCAPE, null); // This callback is not valid. 5028 for (int i = 0; i < context2.length; i++) { 5029 decoder.reset(); 5030 cb.position(0); 5031 bb.position(0); 5032 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_ESCAPE, context2[i]); // This callback is valid. 5033 5034 result = decoder.decode(bb, cb, true); 5035 if (result.isError()) { 5036 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder."); 5037 } 5038 } 5039 5040 // Testing decoder skip callback 5041 for (int i = 0; i < context3.length; i++) { 5042 decoder.reset(); 5043 cb.position(0); 5044 bb.position(0); 5045 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_SKIP, context3[i]); 5046 result = decoder.decode(bb, cb, true); 5047 if (!result.isError()) { 5048 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder should have occurred."); 5049 } 5050 } 5051 } 5052 5053 // Testing invalid input exceptions 5054 @Test TestInvalidInput()5055 public void TestInvalidInput() { 5056 CharsetProvider provider = new CharsetProviderICU(); 5057 Charset charset = provider.charsetForName("iso-2022-jp"); 5058 CharsetEncoder encoder = charset.newEncoder(); 5059 CharsetDecoder decoder = charset.newDecoder(); 5060 5061 try { 5062 encoder.encode(CharBuffer.allocate(10), null, true); 5063 errln("Illegal argument exception should have been thrown due to null target."); 5064 } catch (Exception ex) { 5065 } 5066 5067 try { 5068 decoder.decode(ByteBuffer.allocate(10), null, true); 5069 errln("Illegal argument exception should have been thrown due to null target."); 5070 } catch (Exception ex) { 5071 } 5072 } 5073 5074 // Test java canonical names 5075 @Test TestGetICUJavaCanonicalNames()5076 public void TestGetICUJavaCanonicalNames() { 5077 // Ambiguous charset name. 5078 String javaCName = CharsetProviderICU.getJavaCanonicalName("windows-1250"); 5079 String icuCName = CharsetProviderICU.getICUCanonicalName("Windows-1250"); 5080 if (javaCName == null || icuCName == null) { 5081 errln("Unable to get Java or ICU canonical name from ambiguous alias"); 5082 } 5083 5084 } 5085 5086 // Port over from ICU4C for test conversion tables (mbcs version 5.x) 5087 // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU. 5088 @Test TestCharsetTestData()5089 public void TestCharsetTestData() { 5090 CoderResult result = CoderResult.UNDERFLOW; 5091 String charsetName = "test4"; 5092 CharsetProvider provider = new CharsetProviderICU(); 5093 Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "com/ibm/icu/dev/data/testdata", 5094 this.getClass().getClassLoader()); 5095 CharsetEncoder encoder = charset.newEncoder(); 5096 CharsetDecoder decoder = charset.newDecoder(); 5097 5098 byte bytearray[] = { 5099 0x01, 0x02, 0x03, 0x0a, 5100 0x01, 0x02, 0x03, 0x0b, 5101 0x01, 0x02, 0x03, 0x0d, 5102 }; 5103 5104 // set the callback for overflow errors 5105 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null); 5106 5107 ByteBuffer bb = ByteBuffer.wrap(bytearray); 5108 CharBuffer cb = CharBuffer.allocate(10); 5109 5110 bb.limit(4); 5111 cb.limit(1); // Overflow should occur and is expected 5112 result = decoder.decode(bb, cb, false); 5113 if (result.isError()) { 5114 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5115 } 5116 5117 bb.limit(8); 5118 result = decoder.decode(bb, cb, false); 5119 if (result.isError()) { 5120 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5121 } 5122 5123 bb.limit(12); 5124 result = decoder.decode(bb, cb, true); 5125 if (result.isError()) { 5126 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5127 } 5128 5129 char chararray[] = { 5130 0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */ 5131 0xD940, /* first half of \U00060006 or \U00060007 */ 5132 0xDC07/* second half of \U00060007 */ 5133 }; 5134 5135 cb = CharBuffer.wrap(chararray); 5136 bb = ByteBuffer.allocate(10); 5137 5138 bb.limit(2); 5139 cb.limit(4); 5140 result = encoder.encode(cb, bb, false); 5141 if (result.isError()) { 5142 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5143 } 5144 cb.limit(5); 5145 result = encoder.encode(cb, bb, false); 5146 if (result.isError()) { 5147 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5148 } 5149 cb.limit(6); 5150 result = encoder.encode(cb, bb, true); 5151 if (!result.isError()) { 5152 errln("Error should have occurred while encoding: " + charsetName); 5153 } 5154 } 5155 5156 /* Round trip test of SCSU converter*/ 5157 @Test TestSCSUConverter()5158 public void TestSCSUConverter(){ 5159 byte allFeaturesSCSU[]={ 5160 0x41,(byte) 0xdf, 0x12,(byte) 0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x1b, 0x03, 5161 (byte)0xdf, 0x1c,(byte) 0x88,(byte) 0x80, 0x0b, (byte)0xbf,(byte) 0xff,(byte) 0xff, 0x0d, 0x0a, 5162 0x41, 0x10, (byte)0xdf, 0x12, (byte)0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x13, 5163 (byte)0xdf, 0x14,(byte) 0x80, 0x15, (byte)0xff 5164 }; 5165 5166 char allFeaturesUTF16[]={ 5167 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 5168 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 5169 0x01df, 0xf000, 0xdbff, 0xdfff 5170 }; 5171 5172 5173 char germanUTF16[]={ 5174 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 5175 }; 5176 5177 byte germanSCSU[]={ 5178 (byte)0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65,(byte) 0xdf, 0x74 5179 }; 5180 5181 char russianUTF16[]={ 5182 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 5183 }; 5184 5185 byte russianSCSU[]={ 5186 0x12, (byte)0x9c,(byte)0xbe,(byte) 0xc1, (byte)0xba, (byte)0xb2, (byte)0xb0 5187 }; 5188 5189 char japaneseUTF16[]={ 5190 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 5191 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 5192 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 5193 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 5194 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 5195 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 5196 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 5197 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 5198 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 5199 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 5200 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 5201 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 5202 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 5203 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 5204 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 5205 }; 5206 5207 // SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 5208 //it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient 5209 byte japaneseSCSU[]={ 5210 0x08, 0x00, 0x1b, 0x4c,(byte) 0xea, 0x16, (byte)0xca, (byte)0xd3,(byte) 0x94, 0x0f, 0x53, (byte)0xef, 0x61, 0x1b, (byte)0xe5,(byte) 0x84, 5211 (byte)0xc4, 0x0f, (byte)0x53,(byte) 0xef, 0x61, 0x1b, (byte)0xe5, (byte)0x84, (byte)0xc4, 0x16, (byte)0xca, (byte)0xd3, (byte)0x94, 0x08, 0x02, 0x0f, 5212 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, (byte)0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41,(byte) 0x88, 0x4c, 5213 (byte) 0xe5,(byte) 0x97, (byte)0x9f, 0x08, 0x0c, 0x16,(byte) 0xca,(byte) 0xd3, (byte)0x94, 0x15, (byte)0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 5214 (byte) 0x8c, (byte)0xb4, (byte)0xa3,(byte) 0x9f,(byte) 0xca, (byte)0x99, (byte)0xcb,(byte) 0x8b, (byte)0xc2,(byte) 0x97,(byte) 0xcc,(byte) 0xaa,(byte) 0x84, 0x08, 0x02, 0x0e, 5215 0x7c, 0x73, (byte)0xe2, 0x16, (byte)0xa3,(byte) 0xb7, (byte)0xcb, (byte)0x93, (byte)0xd3,(byte) 0xb4,(byte) 0xc5, (byte)0xdc, (byte)0x9f, 0x0e, 0x79, 0x3e, 5216 0x06, (byte)0xae, (byte)0xb1, (byte)0x9d,(byte) 0x93, (byte)0xd3, 0x08, 0x0c, (byte)0xbe,(byte) 0xa3, (byte)0x8f, 0x08,(byte) 0x88,(byte) 0xbe,(byte) 0xa3,(byte) 0x8d, 5217 (byte)0xd3,(byte) 0xa8, (byte)0xa3, (byte)0x97,(byte) 0xc5, 0x17,(byte) 0x89, 0x08, 0x0d, 0x15,(byte) 0xd2, 0x08, 0x01, (byte)0x93, (byte)0xc8,(byte) 0xaa, 5218 (byte)0x8f, 0x0e, 0x61, 0x1b, (byte)0x99,(byte) 0xcb, 0x0e, 0x4e, (byte)0xba, (byte)0x9f, (byte)0xa1,(byte) 0xae,(byte) 0x93, (byte)0xa8,(byte) 0xa0, 0x08, 5219 0x02, 0x08, 0x0c, (byte)0xe2, 0x16, (byte)0xa3, (byte)0xb7, (byte)0xcb, 0x0f, 0x4f,(byte) 0xe1,(byte) 0x80, 0x05,(byte) 0xec, 0x60, (byte)0x8d, 5220 (byte)0xea, 0x06,(byte) 0xd3,(byte) 0xe6, 0x0f,(byte) 0x8a, 0x00, 0x30, 0x44, 0x65,(byte) 0xb9, (byte)0xe4, (byte)0xfe,(byte) 0xe7,(byte) 0xc2, 0x06, 5221 (byte)0xcb, (byte)0x82 5222 }; 5223 5224 CharsetProviderICU cs = new CharsetProviderICU(); 5225 CharsetICU charset = (CharsetICU)cs.charsetForName("scsu"); 5226 CharsetDecoder decode = charset.newDecoder(); 5227 CharsetEncoder encode = charset.newEncoder(); 5228 5229 //String[] codePoints = {"allFeatures", "german","russian","japanese"}; 5230 byte[][] fromUnicode={allFeaturesSCSU,germanSCSU,russianSCSU,japaneseSCSU}; 5231 char[][] toUnicode = {allFeaturesUTF16, germanUTF16,russianUTF16,japaneseUTF16}; 5232 5233 for(int i=0;i<4;i++){ 5234 ByteBuffer decoderBuffer = ByteBuffer.wrap(fromUnicode[i]); 5235 CharBuffer encoderBuffer = CharBuffer.wrap(toUnicode[i]); 5236 5237 try{ 5238 // Decoding 5239 CharBuffer decoderResult = decode.decode(decoderBuffer); 5240 encoderBuffer.position(0); 5241 if(!decoderResult.equals(encoderBuffer)){ 5242 errln("Error occured while decoding "+ charset.name()); 5243 } 5244 // Encoding 5245 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5246 // RoundTrip Test 5247 ByteBuffer roundTrip = encoderResult; 5248 CharBuffer roundTripResult = decode.decode(roundTrip); 5249 encoderBuffer.position(0); 5250 if(!roundTripResult.equals(encoderBuffer)){ 5251 errln("Error occured while encoding "+ charset.name()); 5252 } 5253 // Test overflow for code coverage reasons 5254 if (i == 0) { 5255 ByteBuffer test = encoderResult; 5256 test.position(0); 5257 CharBuffer smallBuffer = CharBuffer.allocate(11); 5258 decode.reset(); 5259 CoderResult status = decode.decode(test, smallBuffer, true); 5260 if (status != CoderResult.OVERFLOW) { 5261 errln("Overflow buffer error should have been thrown."); 5262 } 5263 } 5264 }catch(Exception e){ 5265 errln("Exception while converting SCSU thrown: " + e); 5266 } 5267 } 5268 5269 /* Provide better code coverage */ 5270 /* testing illegal codepoints */ 5271 CoderResult illegalResult = CoderResult.UNDERFLOW; 5272 CharBuffer illegalDecoderTrgt = CharBuffer.allocate(10); 5273 5274 byte[] illegalDecoderSrc1 = { (byte)0x41, (byte)0xdf, (byte)0x0c }; 5275 decode.reset(); 5276 illegalResult = decode.decode(ByteBuffer.wrap(illegalDecoderSrc1), illegalDecoderTrgt, true); 5277 if (illegalResult == CoderResult.OVERFLOW || illegalResult == CoderResult.UNDERFLOW) { 5278 errln("Malformed error should have been returned for decoder " + charset.name()); 5279 } 5280 /* code coverage test from nucnvtst.c in ICU4C */ 5281 CoderResult ccResult = CoderResult.UNDERFLOW; 5282 int CCBufSize = 120 * 10; 5283 ByteBuffer trgt = ByteBuffer.allocate(CCBufSize); 5284 CharBuffer test = CharBuffer.allocate(CCBufSize); 5285 String [] ccSrc = { 5286 "\ud800\udc00", /* smallest surrogate*/ 5287 "\ud8ff\udcff", 5288 "\udBff\udFff", /* largest surrogate pair*/ 5289 "\ud834\udc00", 5290 //"\U0010FFFF", 5291 "Hello \u9292 \u9192 World!", 5292 "Hell\u0429o \u9292 \u9192 W\u00e4rld!", 5293 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5294 5295 "\u0648\u06c8", /* catch missing reset*/ 5296 "\u0648\u06c8", 5297 5298 "\u4444\uE001", /* lowest quotable*/ 5299 "\u4444\uf2FF", /* highest quotable*/ 5300 "\u4444\uf188\u4444", 5301 "\u4444\uf188\uf288", 5302 "\u4444\uf188abc\u0429\uf288", 5303 "\u9292\u2222", 5304 "Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!", 5305 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5306 "Hello World!123456", 5307 "Hello W\u0081\u011f\u0082!", /* Latin 1 run*/ 5308 5309 "abc\u0301\u0302", /* uses SQn for u301 u302*/ 5310 "abc\u4411d", /* uses SQU*/ 5311 "abc\u4411\u4412d",/* uses SCU*/ 5312 "abc\u0401\u0402\u047f\u00a5\u0405", /* uses SQn for ua5*/ 5313 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", /* SJIS like data*/ 5314 "\u9292\u2222", 5315 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", 5316 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", 5317 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", 5318 5319 "", /* empty input*/ 5320 "\u0000", /* smallest BMP character*/ 5321 "\uFFFF", /* largest BMP character*/ 5322 5323 /* regression tests*/ 5324 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", 5325 /*"\u00df\u01df\uf000\udbff\udfff\u000d\n\u0041\u00df\u0401\u015f\u00df\u01df\uf000\udbff\udfff",*/ 5326 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", 5327 "\u0041\u00df\u0401\u015f", 5328 "\u9066\u2123abc", 5329 //"\ud266\u43d7\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", 5330 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489", 5331 }; 5332 for (int i = 0; i < ccSrc.length; i++) { 5333 CharBuffer ubuf = CharBuffer.wrap(ccSrc[i]); 5334 encode.reset(); 5335 decode.reset(); 5336 trgt.clear(); 5337 test.clear(); 5338 ccResult = encode.encode(ubuf, trgt, true); 5339 if (ccResult.isError()) { 5340 errln("Error while encoding " + charset.name() + " in test for code coverage[" + i + "]."); 5341 } else { 5342 trgt.limit(trgt.position()); 5343 trgt.position(0); 5344 ccResult = decode.decode(trgt, test, true); 5345 if (ccResult.isError()) { 5346 errln("Error while decoding " + charset.name() + " in test for code coverage[" + i + "]."); 5347 } else { 5348 ubuf.position(0); 5349 test.limit(test.position()); 5350 test.position(0); 5351 if (!equals(test, ubuf)) { 5352 errln("Roundtrip failed for " + charset.name() + " in test for code coverage[" + i + "]."); 5353 } 5354 } 5355 } 5356 } 5357 5358 /* Monkey test */ 5359 { 5360 char[] monkeyIn = { 5361 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 5362 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 5363 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 5364 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 5365 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 5366 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 5367 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 5368 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 5369 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 5370 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 5371 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 5372 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 5373 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 5374 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 5375 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 5376 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 5377 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 5378 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 5379 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 5380 /* test non-BMP code points */ 5381 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 5382 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 5383 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 5384 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 5385 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 5386 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 5387 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 5388 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 5389 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 5390 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 5391 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 5392 5393 5394 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 5395 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 5396 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 5397 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 5398 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 5399 }; 5400 encode.reset(); 5401 decode.reset(); 5402 CharBuffer monkeyCB = CharBuffer.wrap(monkeyIn); 5403 try { 5404 ByteBuffer monkeyBB = encode.encode(monkeyCB); 5405 /* CharBuffer monkeyEndResult =*/ decode.decode(monkeyBB); 5406 5407 } catch (Exception ex) { 5408 errln("Exception thrown while encoding/decoding monkey test in SCSU: " + ex); 5409 } 5410 } 5411 // Test malformed 5412 { 5413 char[] malformedSequence = { 5414 0xD899, 0xDC7F, 0xDC88, 0xDC88, 0xD888, 0xDDF9 5415 }; 5416 encode.reset(); 5417 CharBuffer malformedSrc = CharBuffer.wrap(malformedSequence); 5418 5419 try { 5420 encode.encode(malformedSrc); 5421 errln("Malformed error should have thrown an exception."); 5422 } catch (Exception ex) { 5423 } 5424 } 5425 // Test overflow buffer 5426 { 5427 ByteBuffer overflowTest = ByteBuffer.wrap(allFeaturesSCSU); 5428 int sizes[] = { 8, 2, 11 }; 5429 for (int i = 0; i < sizes.length; i++) { 5430 try { 5431 decode.reset(); 5432 overflowTest.position(0); 5433 smBufDecode(decode, "SCSU overflow test", overflowTest, CharBuffer.allocate(sizes[i]), true, false); 5434 errln("Buffer overflow exception should have been thrown."); 5435 } catch (BufferOverflowException ex) { 5436 } catch (Exception ex) { 5437 errln("Buffer overflow exception should have been thrown."); 5438 } 5439 } 5440 5441 } 5442 } 5443 5444 /* Test for BOCU1 converter*/ 5445 @Test TestBOCU1Converter()5446 public void TestBOCU1Converter(){ 5447 char expected[]={ 5448 0xFEFF, 0x0061, 0x0062, 0x0020, // 0 5449 0x0063, 0x0061, 0x000D, 0x000A, 5450 5451 0x0020, 0x0000, 0x00DF, 0x00E6, // 8 5452 0x0930, 0x0020, 0x0918, 0x0909, 5453 5454 0x3086, 0x304D, 0x0020, 0x3053, // 16 5455 0x4000, 0x4E00, 0x7777, 0x0020, 5456 5457 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, // 24 5458 0x0020, 0xD7A3, 0xDC00, 0xD800, 5459 5460 0xD800, 0xDC00, 0xD845, 0xDDDD, // 32 5461 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 5462 5463 0xDFFF, 0x0001, 0x0E40, 0x0020, // 40 5464 0x0009 5465 }; 5466 5467 byte sampleText[]={ // from cintltst/bocu1tst.c/TestBOCU1 text 1 5468 (byte) 0xFB, 5469 (byte) 0xEE, 5470 0x28, // from source offset 0 5471 0x24, 0x1E, 0x52, (byte) 0xB2, 0x20, 5472 (byte) 0xB3, 5473 (byte) 0xB1, 5474 0x0D, 5475 0x0A, 5476 5477 0x20, // from 8 5478 0x00, (byte) 0xD0, 0x6C, (byte) 0xB6, (byte) 0xD8, (byte) 0xA5, 5479 0x20, 0x68, 5480 0x59, 5481 5482 (byte) 0xF9, 5483 0x28, // from 16 5484 0x6D, 0x20, 0x73, (byte) 0xE0, 0x2D, (byte) 0xDE, 0x43, 5485 (byte) 0xD0, 0x33, 0x20, 5486 5487 (byte) 0xFA, 5488 (byte) 0x83, // from 24 5489 0x25, 0x01, (byte) 0xFB, 0x16, (byte) 0x87, 0x4B, 0x16, 0x20, 5490 (byte) 0xE6, (byte) 0xBD, (byte) 0xEB, 0x5B, 0x4B, (byte) 0xCC, 5491 5492 (byte) 0xF9, 5493 (byte) 0xA2, // from 32 5494 (byte) 0xFC, 0x10, 0x3E, (byte) 0xFE, 0x16, 0x3A, (byte) 0x8C, 5495 0x20, (byte) 0xFC, 0x03, (byte) 0xAC, 5496 5497 0x01, /// from 41 5498 (byte) 0xDE, (byte) 0x83, 0x20, 0x09 5499 }; 5500 5501 CharsetProviderICU cs = new CharsetProviderICU(); 5502 CharsetICU charset = (CharsetICU)cs.charsetForName("BOCU-1"); 5503 CharsetDecoder decode = charset.newDecoder(); 5504 CharsetEncoder encode = charset.newEncoder(); 5505 5506 ByteBuffer decoderBuffer = ByteBuffer.wrap(sampleText); 5507 CharBuffer encoderBuffer = CharBuffer.wrap(expected); 5508 try{ 5509 // Decoding 5510 CharBuffer decoderResult = decode.decode(decoderBuffer); 5511 5512 encoderBuffer.position(0); 5513 if(!decoderResult.equals(encoderBuffer)){ 5514 errln("Error occured while decoding "+ charset.name()); 5515 } 5516 // Encoding 5517 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5518 // RoundTrip Test 5519 ByteBuffer roundTrip = encoderResult; 5520 CharBuffer roundTripResult = decode.decode(roundTrip); 5521 5522 encoderBuffer.position(0); 5523 if(!roundTripResult.equals(encoderBuffer)){ 5524 errln("Error occured while encoding "+ charset.name()); 5525 } 5526 }catch(Exception e){ 5527 errln("Exception while converting BOCU-1 thrown: " + e); 5528 } 5529 } 5530 5531 /* Test that ICU4C and ICU4J get the same ICU canonical name when given the same alias. */ 5532 @Test TestICUCanonicalNameConsistency()5533 public void TestICUCanonicalNameConsistency() { 5534 String[] alias = { 5535 "KSC_5601" 5536 }; 5537 String[] expected = { 5538 "windows-949-2000" 5539 }; 5540 5541 for (int i = 0; i < alias.length; i++) { 5542 String name = CharsetProviderICU.getICUCanonicalName(alias[i]); 5543 if (!name.equals(expected[i])) { 5544 errln("The ICU canonical name in ICU4J does not match that in ICU4C. Result: " + name + "Expected: " + expected[i]); 5545 } 5546 } 5547 } 5548 5549 /* Increase code coverage for CharsetICU and CharsetProviderICU*/ 5550 @Test TestCharsetICUCodeCoverage()5551 public void TestCharsetICUCodeCoverage() { 5552 CharsetProviderICU provider = new CharsetProviderICU(); 5553 5554 if (provider.charsetForName("UTF16", null) != null) { 5555 errln("charsetForName should have returned a null"); 5556 } 5557 5558 if (CharsetProviderICU.getJavaCanonicalName(null) != null) { 5559 errln("getJavaCanonicalName should have returned a null when null is given to it."); 5560 } 5561 5562 try { 5563 Charset testCharset = CharsetICU.forNameICU("bogus"); 5564 errln("UnsupportedCharsetException should be thrown for charset \"bogus\" - but got charset " + testCharset.name()); 5565 } catch (UnsupportedCharsetException ex) { 5566 logln("UnsupportedCharsetException was thrown for CharsetICU.forNameICU(\"bogus\")"); 5567 } 5568 5569 Charset charset = provider.charsetForName("UTF16"); 5570 5571 try { 5572 ((CharsetICU)charset).getUnicodeSet(null, 0); 5573 } catch (IllegalArgumentException ex) { 5574 return; 5575 } 5576 errln("IllegalArgumentException should have been thrown."); 5577 } 5578 5579 @Test TestCharsetLMBCS()5580 public void TestCharsetLMBCS() { 5581 String []lmbcsNames = { 5582 "LMBCS-1", 5583 "LMBCS-2", 5584 "LMBCS-3", 5585 "LMBCS-4", 5586 "LMBCS-5", 5587 "LMBCS-6", 5588 "LMBCS-8", 5589 "LMBCS-11", 5590 "LMBCS-16", 5591 "LMBCS-17", 5592 "LMBCS-18", 5593 "LMBCS-19" 5594 }; 5595 5596 char[] src = { 5597 0x0192, 0x0041, 0x0061, 0x00D0, 0x00F6, 0x0100, 0x0174, 0x02E4, 0x03F5, 0x03FB, 5598 0x05D3, 0x05D4, 0x05EA, 0x0684, 0x0685, 0x1801, 0x11B3, 0x11E8, 0x1F9A, 0x2EB4, 5599 0x3157, 0x3336, 0x3304, 0xD881, 0xDC88 5600 }; 5601 CharBuffer cbInput = CharBuffer.wrap(src); 5602 5603 CharsetProviderICU provider = new CharsetProviderICU(); 5604 5605 for (int i = 0; i < lmbcsNames.length; i++) { 5606 Charset charset = provider.charsetForName(lmbcsNames[i]); 5607 if (charset == null) { 5608 errln("Unable to create LMBCS charset: " + lmbcsNames[i]); 5609 return; 5610 } 5611 CharsetEncoder encoder = charset.newEncoder(); 5612 CharsetDecoder decoder = charset.newDecoder(); 5613 5614 try { 5615 cbInput.position(0); 5616 ByteBuffer bbTmp = encoder.encode(cbInput); 5617 CharBuffer cbOutput = decoder.decode(bbTmp); 5618 5619 if (!equals(cbInput, cbOutput)) { 5620 errln("Roundtrip test failed for charset: " + lmbcsNames[i]); 5621 } 5622 } catch (Exception ex) { 5623 if (i >= 8) { 5624 /* Expected exceptions */ 5625 continue; 5626 } 5627 errln("Exception thrown: " + ex + " while using charset: " + lmbcsNames[i]); 5628 } 5629 5630 } 5631 5632 // Test malformed 5633 CoderResult malformedResult = CoderResult.UNDERFLOW; 5634 byte[] malformedBytes = { 5635 (byte)0x61, (byte)0x01, (byte)0x29, (byte)0x81, (byte)0xa0, (byte)0x0f 5636 }; 5637 ByteBuffer malformedSrc = ByteBuffer.wrap(malformedBytes); 5638 CharBuffer malformedTrgt = CharBuffer.allocate(10); 5639 int[] malformedLimits = { 5640 2, 6 5641 }; 5642 CharsetDecoder malformedDecoderTest = provider.charsetForName("LMBCS-1").newDecoder(); 5643 for (int n = 0; n < malformedLimits.length; n++) { 5644 malformedDecoderTest.reset(); 5645 5646 malformedSrc.position(0); 5647 malformedSrc.limit(malformedLimits[n]); 5648 5649 malformedTrgt.clear(); 5650 5651 malformedResult = malformedDecoderTest.decode(malformedSrc,malformedTrgt, true); 5652 if (!malformedResult.isMalformed()) { 5653 errln("Malformed error should have resulted."); 5654 } 5655 } 5656 } 5657 5658 /* 5659 * This is a port of ICU4C TestAmbiguousConverter in cintltst. 5660 * Since there is no concept of ambiguous converters in ICU4J 5661 * this test is merely for code coverage reasons. 5662 */ 5663 @Test TestAmbiguousConverter()5664 public void TestAmbiguousConverter() { 5665 byte [] inBytes = { 5666 0x61, 0x5b, 0x5c 5667 }; 5668 ByteBuffer src = ByteBuffer.wrap(inBytes); 5669 CharBuffer trgt = CharBuffer.allocate(20); 5670 5671 CoderResult result = CoderResult.UNDERFLOW; 5672 CharsetProviderICU provider = new CharsetProviderICU(); 5673 String[] names = CharsetProviderICU.getAllNames(); 5674 5675 for (int i = 0; i < names.length; i++) { 5676 Charset charset = provider.charsetForName(names[i]); 5677 if (charset == null) { 5678 /* We don't care about any failures because not all converters are available. */ 5679 continue; 5680 } 5681 CharsetDecoder decoder = charset.newDecoder(); 5682 5683 src.position(0); 5684 trgt.clear(); 5685 5686 result = decoder.decode(src, trgt, true); 5687 if (result.isError()) { 5688 /* We don't care about any failures. */ 5689 continue; 5690 } 5691 } 5692 } 5693 5694 @Test TestIsFixedWidth()5695 public void TestIsFixedWidth(){ 5696 String[] fixedWidth = { 5697 "US-ASCII", 5698 "UTF32", 5699 "ibm-5478_P100-1995" 5700 }; 5701 5702 String[] notFixedWidth = { 5703 "GB18030", 5704 "UTF8", 5705 "windows-949-2000", 5706 "UTF16" 5707 }; 5708 CharsetProvider provider = new CharsetProviderICU(); 5709 Charset charset; 5710 5711 for (int i = 0; i < fixedWidth.length; i++) { 5712 charset = provider.charsetForName(fixedWidth[i]); 5713 5714 if (!((CharsetICU)charset).isFixedWidth()) { 5715 errln(fixedWidth[i] + " is a fixedWidth charset but returned false."); 5716 } 5717 } 5718 5719 for (int i = 0; i < notFixedWidth.length; i++) { 5720 charset = provider.charsetForName(notFixedWidth[i]); 5721 5722 if (((CharsetICU)charset).isFixedWidth()) { 5723 errln(notFixedWidth[i] + " is NOT a fixedWidth charset but returned true."); 5724 } 5725 } 5726 } 5727 5728 @Test TestBytesLengthForString()5729 public void TestBytesLengthForString() { 5730 CharsetProviderICU provider = new CharsetProviderICU(); 5731 String[] charsets = { 5732 "windows-949-2000", 5733 "ibm-1047_P100-1995,swaplfnl", 5734 "ibm-930_P120-1999", 5735 "ISCII,version=0", 5736 "ISO_2022,locale=ko,version=0" 5737 }; 5738 5739 int[] expected = { 5740 40, 5741 20, 5742 80, /* changed from 60 to 80 to reflect the updates by #9205 */ 5743 80, 5744 160 5745 }; 5746 5747 int stringLength = 10; 5748 int length; 5749 int maxCharSize; 5750 5751 for (int i = 0; i < charsets.length; i++) { 5752 maxCharSize = (int)provider.charsetForName(charsets[i]).newEncoder().maxBytesPerChar(); 5753 length = CharsetEncoderICU.getMaxBytesForString(stringLength, maxCharSize); 5754 5755 if (length != expected[i]) { 5756 errln("For charset " + charsets[i] + " with string length " + stringLength + ", expected max byte length is " + expected[i] + " but got " + length); 5757 } 5758 } 5759 } 5760 5761 /* 5762 * When converting slices of a larger CharBuffer, Charset88591 and CharsetASCII does not handle the buffer correctly when 5763 * an unmappable character occurs. 5764 * Ticket #8729 5765 */ 5766 @Test TestCharsetASCII8859BufferHandling()5767 public void TestCharsetASCII8859BufferHandling() { 5768 String firstLine = "C077693790=|MEMO=|00=|022=|Blanche st and the driveway grate was fault and rotated under my car=|\r\n"; 5769 String secondLine = "C077693790=|MEMO=|00=|023=|puncturing the fuel tank. I spoke to the store operator (Ram Reddi –=|\r\n"; 5770 5771 String charsetNames[] = { 5772 "ASCII", 5773 "ISO-8859-1" 5774 }; 5775 5776 CoderResult result = CoderResult.UNDERFLOW; 5777 5778 CharsetEncoder encoder; 5779 5780 ByteBuffer outBuffer = ByteBuffer.allocate(500); 5781 CharBuffer charBuffer = CharBuffer.allocate(firstLine.length() + secondLine.length()); 5782 charBuffer.put(firstLine); 5783 charBuffer.put(secondLine); 5784 charBuffer.flip(); 5785 5786 for (int i = 0; i < charsetNames.length; i++) { 5787 encoder = CharsetICU.forNameICU(charsetNames[i]).newEncoder(); 5788 5789 charBuffer.position(firstLine.length()); 5790 CharBuffer charBufferSlice = charBuffer.slice(); 5791 charBufferSlice.limit(secondLine.length() - 2); 5792 5793 5794 try { 5795 result = encoder.encode(charBufferSlice, outBuffer, false); 5796 if (!result.isUnmappable()) { 5797 errln("Result of encoding " + charsetNames[i] + " should be: \"Unmappable\". Instead got: " + result); 5798 } 5799 } catch (IllegalArgumentException ex) { 5800 errln("IllegalArgumentException should not have been thrown when encoding: " + charsetNames[i]); 5801 } 5802 } 5803 } 5804 5805 /* 5806 * When converting with the String method getBytes(), buffer overflow exception is thrown because 5807 * of the way ICU4J is calculating the max bytes per char. This should be changed only on the ICU4J 5808 * side to match what the Java method is expecting. The ICU4C size will be left unchanged. 5809 * Ticket #9205 5810 */ 5811 @Test TestBufferOverflowErrorUsingJavagetBytes()5812 public void TestBufferOverflowErrorUsingJavagetBytes() { 5813 String charsetName = "ibm-5035"; 5814 String testCase = "\u7d42"; 5815 5816 try { 5817 testCase.getBytes(charsetName); 5818 } catch (Exception ex) { 5819 errln("Error calling getBytes(): " + ex); 5820 } 5821 5822 } 5823 5824 @Test TestDefaultIgnorableCallback()5825 public void TestDefaultIgnorableCallback() { 5826 String cnv_name = "euc-jp-2007"; 5827 String pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; 5828 String pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]"; 5829 UnicodeSet set_ignorable = new UnicodeSet(pattern_ignorable); 5830 UnicodeSet set_not_ignorable = new UnicodeSet(pattern_not_ignorable); 5831 CharsetEncoder encoder = CharsetICU.forNameICU(cnv_name).newEncoder(); 5832 5833 // set callback for the converter 5834 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 5835 encoder.onMalformedInput(CodingErrorAction.REPLACE); 5836 5837 // test ignorable code points are ignored 5838 int size = set_ignorable.size(); 5839 for (int i = 0; i < size; i++) { 5840 encoder.reset(); 5841 try { 5842 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_ignorable.charAt(i)))).limit() > 0) { 5843 errln("Callback should have ignore default ignorable: U+" + Integer.toHexString(set_ignorable.charAt(i))); 5844 } 5845 } catch (Exception ex) { 5846 errln("Error received converting +" + Integer.toHexString(set_ignorable.charAt(i))); 5847 } 5848 } 5849 5850 // test non-ignorable code points are not ignored 5851 size = set_not_ignorable.size(); 5852 for (int i = 0; i < size; i++) { 5853 encoder.reset(); 5854 try { 5855 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_not_ignorable.charAt(i)))).limit() == 0) { 5856 errln("Callback should not have ignored: U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5857 } 5858 } catch (Exception ex) { 5859 errln("Error received converting U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5860 } 5861 } 5862 } 5863 } 5864