1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.dev.test.lang; 12 13 import java.io.BufferedReader; 14 import java.io.IOException; 15 import java.util.Arrays; 16 import java.util.Locale; 17 18 import org.junit.Test; 19 import org.junit.runner.RunWith; 20 import org.junit.runners.JUnit4; 21 22 import ohos.global.icu.dev.test.TestFmwk; 23 import ohos.global.icu.dev.test.TestUtil; 24 import ohos.global.icu.impl.Norm2AllModes; 25 import ohos.global.icu.impl.Normalizer2Impl; 26 import ohos.global.icu.impl.PatternProps; 27 import ohos.global.icu.impl.UCharacterName; 28 import ohos.global.icu.impl.Utility; 29 import ohos.global.icu.lang.CharacterProperties; 30 import ohos.global.icu.lang.UCharacter; 31 import ohos.global.icu.lang.UCharacterCategory; 32 import ohos.global.icu.lang.UCharacterDirection; 33 import ohos.global.icu.lang.UCharacterEnums; 34 import ohos.global.icu.lang.UProperty; 35 import ohos.global.icu.lang.UScript; 36 import ohos.global.icu.text.Normalizer2; 37 import ohos.global.icu.text.UTF16; 38 import ohos.global.icu.text.UnicodeSet; 39 import ohos.global.icu.text.UnicodeSetIterator; 40 import ohos.global.icu.util.CodePointMap; 41 import ohos.global.icu.util.RangeValueIterator; 42 import ohos.global.icu.util.ULocale; 43 import ohos.global.icu.util.ValueIterator; 44 import ohos.global.icu.util.VersionInfo; 45 46 47 /** 48 * Testing class for UCharacter 49 * Mostly following the test cases for ICU 50 * @author Syn Wee Quek 51 * @since nov 04 2000 52 */ 53 54 @RunWith(JUnit4.class) 55 public final class UCharacterTest extends TestFmwk 56 { 57 // private variables ============================================= 58 59 /** 60 * Expected Unicode version. 61 */ 62 private final VersionInfo VERSION_ = VersionInfo.getInstance(13); 63 64 // constructor =================================================== 65 66 /** 67 * Constructor 68 */ UCharacterTest()69 public UCharacterTest() 70 { 71 } 72 73 // public methods ================================================ 74 75 /** 76 * Testing the letter and number determination in UCharacter 77 */ 78 @Test TestLetterNumber()79 public void TestLetterNumber() 80 { 81 for (int i = 0x0041; i < 0x005B; i ++) 82 if (!UCharacter.isLetter(i)) 83 errln("FAIL \\u" + hex(i) + " expected to be a letter"); 84 85 for (int i = 0x0660; i < 0x066A; i ++) 86 if (UCharacter.isLetter(i)) 87 errln("FAIL \\u" + hex(i) + " expected not to be a letter"); 88 89 for (int i = 0x0660; i < 0x066A; i ++) 90 if (!UCharacter.isDigit(i)) 91 errln("FAIL \\u" + hex(i) + " expected to be a digit"); 92 93 for (int i = 0x0041; i < 0x005B; i ++) 94 if (!UCharacter.isLetterOrDigit(i)) 95 errln("FAIL \\u" + hex(i) + " expected not to be a digit"); 96 97 for (int i = 0x0660; i < 0x066A; i ++) 98 if (!UCharacter.isLetterOrDigit(i)) 99 errln("FAIL \\u" + hex(i) + 100 "expected to be either a letter or a digit"); 101 102 /* 103 * The following checks work only starting from Unicode 4.0. 104 * Check the version number here. 105 */ 106 VersionInfo version = UCharacter.getUnicodeVersion(); 107 if(version.getMajor()<4 || version.equals(VersionInfo.getInstance(4, 0, 1))) { 108 return; 109 } 110 111 112 113 /* 114 * Sanity check: 115 * Verify that exactly the digit characters have decimal digit values. 116 * This assumption is used in the implementation of u_digit() 117 * (which checks nt=de) 118 * compared with the parallel java.lang.Character.digit() 119 * (which checks Nd). 120 * 121 * This was not true in Unicode 3.2 and earlier. 122 * Unicode 4.0 fixed discrepancies. 123 * Unicode 4.0.1 re-introduced problems in this area due to an 124 * unintentionally incomplete last-minute change. 125 */ 126 String digitsPattern = "[:Nd:]"; 127 String decimalValuesPattern = "[:Numeric_Type=Decimal:]"; 128 129 UnicodeSet digits, decimalValues; 130 131 digits= new UnicodeSet(digitsPattern); 132 decimalValues=new UnicodeSet(decimalValuesPattern); 133 134 135 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true); 136 137 138 } 139 140 /** 141 * Tests for space determination in UCharacter 142 */ 143 @Test TestSpaces()144 public void TestSpaces() 145 { 146 int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005}; 147 int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074}; 148 int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1 149 int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b}; 150 151 int size = spaces.length; 152 for (int i = 0; i < size; i ++) 153 { 154 if (!UCharacter.isSpaceChar(spaces[i])) 155 { 156 errln("FAIL \\u" + hex(spaces[i]) + 157 " expected to be a space character"); 158 break; 159 } 160 161 if (UCharacter.isSpaceChar(nonspaces[i])) 162 { 163 errln("FAIL \\u" + hex(nonspaces[i]) + 164 " expected not to be space character"); 165 break; 166 } 167 168 if (!UCharacter.isWhitespace(whitespaces[i])) 169 { 170 errln("FAIL \\u" + hex(whitespaces[i]) + 171 " expected to be a white space character"); 172 break; 173 } 174 if (UCharacter.isWhitespace(nonwhitespaces[i])) 175 { 176 errln("FAIL \\u" + hex(nonwhitespaces[i]) + 177 " expected not to be a space character"); 178 break; 179 } 180 logln("Ok \\u" + hex(spaces[i]) + " and \\u" + 181 hex(nonspaces[i]) + " and \\u" + hex(whitespaces[i]) + 182 " and \\u" + hex(nonwhitespaces[i])); 183 } 184 185 int patternWhiteSpace[] = {0x9, 0xd, 0x20, 0x85, 186 0x200e, 0x200f, 0x2028, 0x2029}; 187 int nonPatternWhiteSpace[] = {0x8, 0xe, 0x21, 0x86, 0xa0, 0xa1, 188 0x1680, 0x1681, 0x180e, 0x180f, 189 0x1FFF, 0x2000, 0x200a, 0x200b, 190 0x2010, 0x202f, 0x2030, 0x205f, 191 0x2060, 0x3000, 0x3001}; 192 for (int i = 0; i < patternWhiteSpace.length; i ++) { 193 if (!PatternProps.isWhiteSpace(patternWhiteSpace[i])) { 194 errln("\\u" + Utility.hex(patternWhiteSpace[i], 4) 195 + " expected to be a Pattern_White_Space"); 196 } 197 } 198 for (int i = 0; i < nonPatternWhiteSpace.length; i ++) { 199 if (PatternProps.isWhiteSpace(nonPatternWhiteSpace[i])) { 200 errln("\\u" + Utility.hex(nonPatternWhiteSpace[i], 4) 201 + " expected to be a non-Pattern_White_Space"); 202 } 203 } 204 205 // TODO: propose public API for constants like uchar.h's U_GC_*_MASK 206 // (http://bugs.icu-project.org/trac/ticket/7461) 207 int GC_Z_MASK = 208 (1 << UCharacter.SPACE_SEPARATOR) | 209 (1 << UCharacter.LINE_SEPARATOR) | 210 (1 << UCharacter.PARAGRAPH_SEPARATOR); 211 212 // UCharacter.isWhitespace(c) should be the same as Character.isWhitespace(). 213 // This uses logln() because Character.isWhitespace() differs between Java versions, thus 214 // it is not necessarily an error if there is a difference between 215 // particular Java and ICU versions. 216 // However, you need to run tests with -v to see the output. 217 // Also note that, at least as of Unicode 5.2, 218 // there are no supplementary white space characters. 219 for (int c = 0; c <= 0xffff; ++c) { 220 boolean j = Character.isWhitespace(c); 221 boolean i = UCharacter.isWhitespace(c); 222 boolean u = UCharacter.isUWhiteSpace(c); 223 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) & 224 GC_Z_MASK) != 0; 225 if (j != i) { 226 logln(String.format( 227 "isWhitespace(U+%04x) difference: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b", 228 c, j, i, u, z)); 229 } else if (j || i || u || z) { 230 logln(String.format( 231 "isWhitespace(U+%04x) FYI: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b", 232 c, j, i, u, z)); 233 } 234 } 235 for (char c = 0; c <= 0xff; ++c) { 236 boolean j = Character.isSpace(c); 237 boolean i = UCharacter.isSpace(c); 238 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) & 239 GC_Z_MASK) != 0; 240 if (j != i) { 241 logln(String.format( 242 "isSpace(U+%04x) difference: JDK %5b ICU %5b Z Separator %5b", 243 (int)c, j, i, z)); 244 } else if (j || i || z) { 245 logln(String.format( 246 "isSpace(U+%04x) FYI: JDK %5b ICU %5b Z Separator %5b", 247 (int)c, j, i, z)); 248 } 249 } 250 } 251 252 /** 253 * Test various implementations of Pattern_Syntax & Pattern_White_Space. 254 */ 255 @Test TestPatternProperties()256 public void TestPatternProperties() { 257 UnicodeSet syn_pp = new UnicodeSet(); 258 UnicodeSet syn_prop = new UnicodeSet("[:Pattern_Syntax:]"); 259 UnicodeSet syn_list = new UnicodeSet( 260 "[!-/\\:-@\\[-\\^`\\{-~"+ 261 "\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE\u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7"+ 262 "\u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E\u2190-\u245F\u2500-\u2775"+ 263 "\u2794-\u2BFF\u2E00-\u2E7F\u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]"); 264 UnicodeSet ws_pp = new UnicodeSet(); 265 UnicodeSet ws_prop = new UnicodeSet("[:Pattern_White_Space:]"); 266 UnicodeSet ws_list = new UnicodeSet("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"); 267 UnicodeSet syn_ws_pp = new UnicodeSet(); 268 UnicodeSet syn_ws_prop = new UnicodeSet(syn_prop).addAll(ws_prop); 269 for(int c=0; c<=0xffff; ++c) { 270 if(PatternProps.isSyntax(c)) { 271 syn_pp.add(c); 272 } 273 if(PatternProps.isWhiteSpace(c)) { 274 ws_pp.add(c); 275 } 276 if(PatternProps.isSyntaxOrWhiteSpace(c)) { 277 syn_ws_pp.add(c); 278 } 279 } 280 compareUSets(syn_pp, syn_prop, 281 "PatternProps.isSyntax()", "[:Pattern_Syntax:]", true); 282 compareUSets(syn_pp, syn_list, 283 "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", true); 284 compareUSets(ws_pp, ws_prop, 285 "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", true); 286 compareUSets(ws_pp, ws_list, 287 "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", true); 288 compareUSets(syn_ws_pp, syn_ws_prop, 289 "PatternProps.isSyntaxOrWhiteSpace()", 290 "[[:Pattern_Syntax:][:Pattern_White_Space:]]", true); 291 } 292 293 /** 294 * Tests for defined and undefined characters 295 */ 296 @Test TestDefined()297 public void TestDefined() 298 { 299 int undefined[] = {0xfff1, 0xfff7, 0xfa6e}; 300 int defined[] = {0x523E, 0x004f88, 0x00fffd}; 301 302 int size = undefined.length; 303 for (int i = 0; i < size; i ++) 304 { 305 if (UCharacter.isDefined(undefined[i])) 306 { 307 errln("FAIL \\u" + hex(undefined[i]) + 308 " expected not to be defined"); 309 break; 310 } 311 if (!UCharacter.isDefined(defined[i])) 312 { 313 errln("FAIL \\u" + hex(defined[i]) + " expected defined"); 314 break; 315 } 316 } 317 } 318 319 /** 320 * Tests for base characters and their cellwidth 321 */ 322 @Test TestBase()323 public void TestBase() 324 { 325 int base[] = {0x0061, 0x000031, 0x0003d2}; 326 int nonbase[] = {0x002B, 0x000020, 0x00203B}; 327 int size = base.length; 328 for (int i = 0; i < size; i ++) 329 { 330 if (UCharacter.isBaseForm(nonbase[i])) 331 { 332 errln("FAIL \\u" + hex(nonbase[i]) + 333 " expected not to be a base character"); 334 break; 335 } 336 if (!UCharacter.isBaseForm(base[i])) 337 { 338 errln("FAIL \\u" + hex(base[i]) + 339 " expected to be a base character"); 340 break; 341 } 342 } 343 } 344 345 /** 346 * Tests for digit characters 347 */ 348 @Test TestDigits()349 public void TestDigits() 350 { 351 int digits[] = {0x0030, 0x000662, 0x000F23, 0x000ED5, 0x002160}; 352 353 //special characters not in the properties table 354 int digits2[] = {0x3007, 0x004e00, 0x004e8c, 0x004e09, 0x0056d8, 355 0x004e94, 0x00516d, 0x4e03, 0x00516b, 0x004e5d}; 356 int nondigits[] = {0x0010, 0x000041, 0x000122, 0x0068FE}; 357 358 int digitvalues[] = {0, 2, 3, 5, 1}; 359 int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; 360 361 int size = digits.length; 362 for (int i = 0; i < size; i ++) { 363 if (UCharacter.isDigit(digits[i]) && 364 UCharacter.digit(digits[i]) != digitvalues[i]) 365 { 366 errln("FAIL \\u" + hex(digits[i]) + 367 " expected digit with value " + digitvalues[i]); 368 break; 369 } 370 } 371 size = nondigits.length; 372 for (int i = 0; i < size; i ++) 373 if (UCharacter.isDigit(nondigits[i])) 374 { 375 errln("FAIL \\u" + hex(nondigits[i]) + " expected nondigit"); 376 break; 377 } 378 379 size = digits2.length; 380 for (int i = 0; i < 10; i ++) { 381 if (UCharacter.isDigit(digits2[i]) && 382 UCharacter.digit(digits2[i]) != digitvalues2[i]) 383 { 384 errln("FAIL \\u" + hex(digits2[i]) + 385 " expected digit with value " + digitvalues2[i]); 386 break; 387 } 388 } 389 } 390 391 /** 392 * Tests for numeric characters 393 */ 394 @Test TestNumeric()395 public void TestNumeric() 396 { 397 if (UCharacter.getNumericValue(0x00BC) != -2) { 398 errln("Numeric value of 0x00BC expected to be -2"); 399 } 400 401 for (int i = '0'; i < '9'; i ++) { 402 int n1 = UCharacter.getNumericValue(i); 403 double n2 = UCharacter.getUnicodeNumericValue(i); 404 if (n1 != n2 || n1 != (i - '0')) { 405 errln("Numeric value of " + (char)i + " expected to be " + 406 (i - '0')); 407 } 408 } 409 for (int i = 'A'; i < 'F'; i ++) { 410 int n1 = UCharacter.getNumericValue(i); 411 double n2 = UCharacter.getUnicodeNumericValue(i); 412 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 'A' + 10)) { 413 errln("Numeric value of " + (char)i + " expected to be " + 414 (i - 'A' + 10)); 415 } 416 } 417 for (int i = 0xFF21; i < 0xFF26; i ++) { 418 // testing full wideth latin characters A-F 419 int n1 = UCharacter.getNumericValue(i); 420 double n2 = UCharacter.getUnicodeNumericValue(i); 421 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 0xFF21 + 10)) { 422 errln("Numeric value of " + (char)i + " expected to be " + 423 (i - 0xFF21 + 10)); 424 } 425 } 426 // testing han numbers 427 int han[] = {0x96f6, 0, 0x58f9, 1, 0x8cb3, 2, 0x53c3, 3, 428 0x8086, 4, 0x4f0d, 5, 0x9678, 6, 0x67d2, 7, 429 0x634c, 8, 0x7396, 9, 0x5341, 10, 0x62fe, 10, 430 0x767e, 100, 0x4f70, 100, 0x5343, 1000, 0x4edf, 1000, 431 0x824c, 10000, 0x5104, 100000000}; 432 for (int i = 0; i < han.length; i += 2) { 433 if (UCharacter.getHanNumericValue(han[i]) != han[i + 1]) { 434 errln("Numeric value of \\u" + 435 Integer.toHexString(han[i]) + " expected to be " + 436 han[i + 1]); 437 } 438 } 439 } 440 441 /** 442 * Tests for version 443 */ 444 @Test TestVersion()445 public void TestVersion() 446 { 447 if (!UCharacter.getUnicodeVersion().equals(VERSION_)) 448 errln("FAIL expected: " + VERSION_ + " got: " + UCharacter.getUnicodeVersion()); 449 } 450 451 /** 452 * Tests for control characters 453 */ 454 @Test TestISOControl()455 public void TestISOControl() 456 { 457 int control[] = {0x001b, 0x000097, 0x000082}; 458 int noncontrol[] = {0x61, 0x000031, 0x0000e2}; 459 460 int size = control.length; 461 for (int i = 0; i < size; i ++) 462 { 463 if (!UCharacter.isISOControl(control[i])) 464 { 465 errln("FAIL 0x" + Integer.toHexString(control[i]) + 466 " expected to be a control character"); 467 break; 468 } 469 if (UCharacter.isISOControl(noncontrol[i])) 470 { 471 errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) + 472 " expected to be not a control character"); 473 break; 474 } 475 476 logln("Ok 0x" + Integer.toHexString(control[i]) + " and 0x" + 477 Integer.toHexString(noncontrol[i])); 478 } 479 } 480 481 /** 482 * Test Supplementary 483 */ 484 @Test TestSupplementary()485 public void TestSupplementary() 486 { 487 for (int i = 0; i < 0x10000; i ++) { 488 if (UCharacter.isSupplementary(i)) { 489 errln("Codepoint \\u" + Integer.toHexString(i) + 490 " is not supplementary"); 491 } 492 } 493 for (int i = 0x10000; i < 0x10FFFF; i ++) { 494 if (!UCharacter.isSupplementary(i)) { 495 errln("Codepoint \\u" + Integer.toHexString(i) + 496 " is supplementary"); 497 } 498 } 499 } 500 501 /** 502 * Test mirroring 503 */ 504 @Test TestMirror()505 public void TestMirror() 506 { 507 if (!(UCharacter.isMirrored(0x28) && UCharacter.isMirrored(0xbb) && 508 UCharacter.isMirrored(0x2045) && UCharacter.isMirrored(0x232a) 509 && !UCharacter.isMirrored(0x27) && 510 !UCharacter.isMirrored(0x61) && !UCharacter.isMirrored(0x284) 511 && !UCharacter.isMirrored(0x3400))) { 512 errln("isMirrored() does not work correctly"); 513 } 514 515 if (!(UCharacter.getMirror(0x3c) == 0x3e && 516 UCharacter.getMirror(0x5d) == 0x5b && 517 UCharacter.getMirror(0x208d) == 0x208e && 518 UCharacter.getMirror(0x3017) == 0x3016 && 519 520 UCharacter.getMirror(0xbb) == 0xab && 521 UCharacter.getMirror(0x2215) == 0x29F5 && 522 UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */ 523 524 UCharacter.getMirror(0x2e) == 0x2e && 525 UCharacter.getMirror(0x6f3) == 0x6f3 && 526 UCharacter.getMirror(0x301c) == 0x301c && 527 UCharacter.getMirror(0xa4ab) == 0xa4ab && 528 529 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 530 UCharacter.getMirror(0x2018) == 0x2018 && 531 UCharacter.getMirror(0x201b) == 0x201b && 532 UCharacter.getMirror(0x301d) == 0x301d)) { 533 errln("getMirror() does not work correctly"); 534 } 535 536 /* verify that Bidi_Mirroring_Glyph roundtrips */ 537 UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]"); 538 UnicodeSetIterator iter=new UnicodeSetIterator(set); 539 int start, end, c2, c3; 540 while(iter.nextRange() && (start=iter.codepoint)>=0) { 541 end=iter.codepointEnd; 542 do { 543 c2=UCharacter.getMirror(start); 544 c3=UCharacter.getMirror(c2); 545 if(c3!=start) { 546 errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3)); 547 } 548 c3=UCharacter.getBidiPairedBracket(start); 549 if(UCharacter.getIntPropertyValue(start, UProperty.BIDI_PAIRED_BRACKET_TYPE)==UCharacter.BidiPairedBracketType.NONE) { 550 if(c3!=start) { 551 errln("u_getBidiPairedBracket(U+"+hex(start)+") != self for bpt(c)==None"); 552 } 553 } else { 554 if(c3!=c2) { 555 errln("u_getBidiPairedBracket(U+"+hex(start)+") != U+"+hex(c2)+" = bmg(c)'"); 556 } 557 } 558 } while(++start<=end); 559 } 560 561 // verify that Unicode Corrigendum #6 reverts mirrored status of the following 562 if (UCharacter.isMirrored(0x2018) || 563 UCharacter.isMirrored(0x201d) || 564 UCharacter.isMirrored(0x201f) || 565 UCharacter.isMirrored(0x301e)) { 566 errln("Unicode Corrigendum #6 conflict, one or more of 2018/201d/201f/301e has mirrored property"); 567 } 568 } 569 570 /** 571 * Tests for printable characters 572 */ 573 @Test TestPrint()574 public void TestPrint() 575 { 576 int printable[] = {0x0042, 0x00005f, 0x002014}; 577 int nonprintable[] = {0x200c, 0x00009f, 0x00001b}; 578 579 int size = printable.length; 580 for (int i = 0; i < size; i ++) 581 { 582 if (!UCharacter.isPrintable(printable[i])) 583 { 584 errln("FAIL \\u" + hex(printable[i]) + 585 " expected to be a printable character"); 586 break; 587 } 588 if (UCharacter.isPrintable(nonprintable[i])) 589 { 590 errln("FAIL \\u" + hex(nonprintable[i]) + 591 " expected not to be a printable character"); 592 break; 593 } 594 logln("Ok \\u" + hex(printable[i]) + " and \\u" + 595 hex(nonprintable[i])); 596 } 597 598 // test all ISO 8 controls 599 for (int ch = 0; ch <= 0x9f; ++ ch) { 600 if (ch == 0x20) { 601 // skip ASCII graphic characters and continue with DEL 602 ch = 0x7f; 603 } 604 if (UCharacter.isPrintable(ch)) { 605 errln("Fail \\u" + hex(ch) + 606 " is a ISO 8 control character hence not printable\n"); 607 } 608 } 609 610 /* test all Latin-1 graphic characters */ 611 for (int ch = 0x20; ch <= 0xff; ++ ch) { 612 if (ch == 0x7f) { 613 ch = 0xa0; 614 } 615 if (!UCharacter.isPrintable(ch) 616 && ch != 0x00AD/* Unicode 4.0 changed the defintion of soft hyphen to be a Cf*/) { 617 errln("Fail \\u" + hex(ch) + 618 " is a Latin-1 graphic character\n"); 619 } 620 } 621 } 622 623 /** 624 * Testing for identifier characters 625 */ 626 @Test TestIdentifier()627 public void TestIdentifier() 628 { 629 int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061}; 630 int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019}; 631 int unicodeidpart[] = {0x005f, 0x000032, 0x000045}; 632 int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020}; 633 int idignore[] = {0x0006, 0x0010, 0x206b}; 634 int nonidignore[] = {0x0075, 0x0000a3, 0x000061}; 635 636 int size = unicodeidstart.length; 637 for (int i = 0; i < size; i ++) 638 { 639 if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i])) 640 { 641 errln("FAIL \\u" + hex(unicodeidstart[i]) + 642 " expected to be a unicode identifier start character"); 643 break; 644 } 645 if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i])) 646 { 647 errln("FAIL \\u" + hex(nonunicodeidstart[i]) + 648 " expected not to be a unicode identifier start " + 649 "character"); 650 break; 651 } 652 if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i])) 653 { 654 errln("FAIL \\u" + hex(unicodeidpart[i]) + 655 " expected to be a unicode identifier part character"); 656 break; 657 } 658 if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i])) 659 { 660 errln("FAIL \\u" + hex(nonunicodeidpart[i]) + 661 " expected not to be a unicode identifier part " + 662 "character"); 663 break; 664 } 665 if (!UCharacter.isIdentifierIgnorable(idignore[i])) 666 { 667 errln("FAIL \\u" + hex(idignore[i]) + 668 " expected to be a ignorable unicode character"); 669 break; 670 } 671 if (UCharacter.isIdentifierIgnorable(nonidignore[i])) 672 { 673 errln("FAIL \\u" + hex(nonidignore[i]) + 674 " expected not to be a ignorable unicode character"); 675 break; 676 } 677 logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u" + 678 hex(nonunicodeidstart[i]) + " and \\u" + 679 hex(unicodeidpart[i]) + " and \\u" + 680 hex(nonunicodeidpart[i]) + " and \\u" + 681 hex(idignore[i]) + " and \\u" + hex(nonidignore[i])); 682 } 683 } 684 685 /** 686 * Tests for the character types, direction.<br> 687 * This method reads in UnicodeData.txt file for testing purposes. A 688 * default path is provided relative to the src path, however the user 689 * could set a system property to change the directory path.<br> 690 * e.g. java -DUnicodeData="data_directory_path" 691 * ohos.global.icu.dev.test.lang.UCharacterTest 692 */ 693 @Test TestUnicodeData()694 public void TestUnicodeData() 695 { 696 // this is the 2 char category types used in the UnicodeData file 697 final String TYPE = 698 "LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf"; 699 700 // directionality types used in the UnicodeData file 701 // padded by spaces to make each type size 4 702 final String DIR = 703 "L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI "; 704 705 Normalizer2 nfc = Normalizer2.getNFCInstance(); 706 Normalizer2 nfkc = Normalizer2.getNFKCInstance(); 707 708 BufferedReader input = null; 709 try { 710 input = TestUtil.getDataReader("unicode/UnicodeData.txt"); 711 int numErrors = 0; 712 713 for (;;) { 714 String s = input.readLine(); 715 if(s == null) { 716 break; 717 } 718 if(s.length()<4 || s.startsWith("#")) { 719 continue; 720 } 721 String[] fields = s.split(";", -1); 722 assert (fields.length == 15 ) : "Number of fields is " + fields.length + ": " + s; 723 724 int ch = Integer.parseInt(fields[0], 16); 725 726 // testing the general category 727 int type = TYPE.indexOf(fields[2]); 728 if (type < 0) 729 type = 0; 730 else 731 type = (type >> 1) + 1; 732 if (UCharacter.getType(ch) != type) 733 { 734 errln("FAIL \\u" + hex(ch) + " expected type " + type); 735 break; 736 } 737 738 if (UCharacter.getIntPropertyValue(ch, 739 UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) { 740 errln("error: getIntPropertyValue(\\u" + 741 Integer.toHexString(ch) + 742 ", UProperty.GENERAL_CATEGORY_MASK) != " + 743 "getMask(getType(ch))"); 744 } 745 746 // testing combining class 747 int cc = Integer.parseInt(fields[3]); 748 if (UCharacter.getCombiningClass(ch) != cc) 749 { 750 errln("FAIL \\u" + hex(ch) + " expected combining " + 751 "class " + cc); 752 break; 753 } 754 if (nfkc.getCombiningClass(ch) != cc) 755 { 756 errln("FAIL \\u" + hex(ch) + " expected NFKC combining " + 757 "class " + cc); 758 break; 759 } 760 761 // testing the direction 762 String d = fields[4]; 763 if (d.length() == 1) 764 d = d + " "; 765 766 int dir = DIR.indexOf(d) >> 2; 767 if (UCharacter.getDirection(ch) != dir) 768 { 769 errln("FAIL \\u" + hex(ch) + 770 " expected direction " + dir + " but got " + UCharacter.getDirection(ch)); 771 break; 772 } 773 774 byte bdir = (byte)dir; 775 if (UCharacter.getDirectionality(ch) != bdir) 776 { 777 errln("FAIL \\u" + hex(ch) + 778 " expected directionality " + bdir + " but got " + 779 UCharacter.getDirectionality(ch)); 780 break; 781 } 782 783 /* get Decomposition_Type & Decomposition_Mapping, field 5 */ 784 int dt; 785 if(fields[5].length()==0) { 786 /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */ 787 if(ch==0xac00 || ch==0xd7a3) { 788 dt=UCharacter.DecompositionType.CANONICAL; 789 } else { 790 dt=UCharacter.DecompositionType.NONE; 791 } 792 } else { 793 d=fields[5]; 794 dt=-1; 795 if(d.charAt(0)=='<') { 796 int end=d.indexOf('>', 1); 797 if(end>=0) { 798 dt=UCharacter.getPropertyValueEnum(UProperty.DECOMPOSITION_TYPE, d.substring(1, end)); 799 while(d.charAt(++end)==' ') {} // skip spaces 800 d=d.substring(end); 801 } 802 } else { 803 dt=UCharacter.DecompositionType.CANONICAL; 804 } 805 } 806 String dm; 807 if(dt>UCharacter.DecompositionType.NONE) { 808 if(ch==0xac00) { 809 dm="\u1100\u1161"; 810 } else if(ch==0xd7a3) { 811 dm="\ud788\u11c2"; 812 } else { 813 String[] dmChars=d.split(" +"); 814 StringBuilder dmb=new StringBuilder(dmChars.length); 815 for(String dmc : dmChars) { 816 dmb.appendCodePoint(Integer.parseInt(dmc, 16)); 817 } 818 dm=dmb.toString(); 819 } 820 } else { 821 dm=null; 822 } 823 if(dt<0) { 824 errln(String.format("error in UnicodeData.txt: syntax error in U+%04x decomposition field", ch)); 825 return; 826 } 827 int i=UCharacter.getIntPropertyValue(ch, UProperty.DECOMPOSITION_TYPE); 828 assertEquals( 829 String.format("error: UCharacter.getIntPropertyValue(U+%04x, UProperty.DECOMPOSITION_TYPE) is wrong", ch), 830 dt, i); 831 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */ 832 String mapping=nfkc.getRawDecomposition(ch); 833 assertEquals( 834 String.format("error: nfkc.getRawDecomposition(U+%04x) is wrong", ch), 835 dm, mapping); 836 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */ 837 if(dt!=UCharacter.DecompositionType.CANONICAL) { 838 dm=null; 839 } 840 mapping=nfc.getRawDecomposition(ch); 841 assertEquals( 842 String.format("error: nfc.getRawDecomposition(U+%04x) is wrong", ch), 843 dm, mapping); 844 /* recompose */ 845 if(dt==UCharacter.DecompositionType.CANONICAL 846 && !UCharacter.hasBinaryProperty(ch, UProperty.FULL_COMPOSITION_EXCLUSION)) { 847 int a=dm.codePointAt(0); 848 int b=dm.codePointBefore(dm.length()); 849 int composite=nfc.composePair(a, b); 850 assertEquals( 851 String.format( 852 "error: nfc U+%04X decomposes to U+%04X+U+%04X "+ 853 "but does not compose back (instead U+%04X)", 854 ch, a, b, composite), 855 ch, composite); 856 /* 857 * Note: NFKC has fewer round-trip mappings than NFC, 858 * so we can't just test nfkc.composePair(a, b) here without further data. 859 */ 860 } 861 862 // testing iso comment 863 try{ 864 String isocomment = fields[11]; 865 String comment = UCharacter.getISOComment(ch); 866 if (comment == null) { 867 comment = ""; 868 } 869 if (!comment.equals(isocomment)) { 870 errln("FAIL \\u" + hex(ch) + 871 " expected iso comment " + isocomment); 872 break; 873 } 874 }catch(Exception e){ 875 if(e.getMessage().indexOf("unames.icu") >= 0){ 876 numErrors++; 877 }else{ 878 throw e; 879 } 880 } 881 882 String upper = fields[12]; 883 int tempchar = ch; 884 if (upper.length() > 0) { 885 tempchar = Integer.parseInt(upper, 16); 886 } 887 int resultCp = UCharacter.toUpperCase(ch); 888 if (resultCp != tempchar) { 889 errln("FAIL \\u" + Utility.hex(ch, 4) 890 + " expected uppercase \\u" 891 + Utility.hex(tempchar, 4) 892 + " but got \\u" 893 + Utility.hex(resultCp, 4)); 894 break; 895 } 896 897 String lower = fields[13]; 898 tempchar = ch; 899 if (lower.length() > 0) { 900 tempchar = Integer.parseInt(lower, 16); 901 } 902 if (UCharacter.toLowerCase(ch) != tempchar) { 903 errln("FAIL \\u" + Utility.hex(ch, 4) 904 + " expected lowercase \\u" 905 + Utility.hex(tempchar, 4)); 906 break; 907 } 908 909 910 911 String title = fields[14]; 912 tempchar = ch; 913 if (title.length() > 0) { 914 tempchar = Integer.parseInt(title, 16); 915 } 916 if (UCharacter.toTitleCase(ch) != tempchar) { 917 errln("FAIL \\u" + Utility.hex(ch, 4) 918 + " expected titlecase \\u" 919 + Utility.hex(tempchar, 4)); 920 break; 921 } 922 } 923 if(numErrors > 0){ 924 warnln("Could not find unames.icu"); 925 } 926 } catch (Exception e) { 927 e.printStackTrace(); 928 } finally { 929 if (input != null) { 930 try { 931 input.close(); 932 } catch (IOException ignored) { 933 } 934 } 935 } 936 937 if (UCharacter.UnicodeBlock.of(0x0041) 938 != UCharacter.UnicodeBlock.BASIC_LATIN 939 || UCharacter.getIntPropertyValue(0x41, UProperty.BLOCK) 940 != UCharacter.UnicodeBlock.BASIC_LATIN.getID()) { 941 errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! " 942 + "Expected : " 943 + UCharacter.UnicodeBlock.BASIC_LATIN.getID() + " got " 944 + UCharacter.UnicodeBlock.of(0x0041)); 945 } 946 947 // sanity check on repeated properties 948 for (int ch = 0xfffe; ch <= 0x10ffff;) { 949 int type = UCharacter.getType(ch); 950 if (UCharacter.getIntPropertyValue(ch, 951 UProperty.GENERAL_CATEGORY_MASK) 952 != (1 << type)) { 953 errln("error: UCharacter.getIntPropertyValue(\\u" 954 + Integer.toHexString(ch) 955 + ", UProperty.GENERAL_CATEGORY_MASK) != " 956 + "getMask(getType())"); 957 } 958 if (type != UCharacterCategory.UNASSIGNED) { 959 errln("error: UCharacter.getType(\\u" + Utility.hex(ch, 4) 960 + " != UCharacterCategory.UNASSIGNED (returns " 961 + UCharacterCategory.toString(UCharacter.getType(ch)) 962 + ")"); 963 } 964 if ((ch & 0xffff) == 0xfffe) { 965 ++ ch; 966 } 967 else { 968 ch += 0xffff; 969 } 970 } 971 972 // test that PUA is not "unassigned" 973 for(int ch = 0xe000; ch <= 0x10fffd;) { 974 int type = UCharacter.getType(ch); 975 if (UCharacter.getIntPropertyValue(ch, 976 UProperty.GENERAL_CATEGORY_MASK) 977 != (1 << type)) { 978 errln("error: UCharacter.getIntPropertyValue(\\u" 979 + Integer.toHexString(ch) 980 + ", UProperty.GENERAL_CATEGORY_MASK) != " 981 + "getMask(getType())"); 982 } 983 984 if (type == UCharacterCategory.UNASSIGNED) { 985 errln("error: UCharacter.getType(\\u" 986 + Utility.hex(ch, 4) 987 + ") == UCharacterCategory.UNASSIGNED"); 988 } 989 else if (type != UCharacterCategory.PRIVATE_USE) { 990 logln("PUA override: UCharacter.getType(\\u" 991 + Utility.hex(ch, 4) + ")=" + type); 992 } 993 if (ch == 0xf8ff) { 994 ch = 0xf0000; 995 } 996 else if (ch == 0xffffd) { 997 ch = 0x100000; 998 } 999 else { 1000 ++ ch; 1001 } 1002 } 1003 } 1004 1005 1006 /** 1007 * Test for the character names 1008 */ 1009 @Test TestNames()1010 public void TestNames() 1011 { 1012 try{ 1013 int length = UCharacterName.INSTANCE.getMaxCharNameLength(); 1014 if (length < 83) { // Unicode 3.2 max char name length 1015 errln("getMaxCharNameLength()=" + length + " is too short"); 1016 } 1017 1018 int c[] = {0x0061, //LATIN SMALL LETTER A 1019 0x000284, //LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK 1020 0x003401, //CJK UNIFIED IDEOGRAPH-3401 1021 0x007fed, //CJK UNIFIED IDEOGRAPH-7FED 1022 0x00ac00, //HANGUL SYLLABLE GA 1023 0x00d7a3, //HANGUL SYLLABLE HIH 1024 0x00d800, 0x00dc00, //LINEAR B SYLLABLE B008 A 1025 0xff08, //FULLWIDTH LEFT PARENTHESIS 1026 0x00ffe5, //FULLWIDTH YEN SIGN 1027 0x00ffff, //null 1028 0x0023456 //CJK UNIFIED IDEOGRAPH-23456 1029 }; 1030 String name[] = { 1031 "LATIN SMALL LETTER A", 1032 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 1033 "CJK UNIFIED IDEOGRAPH-3401", 1034 "CJK UNIFIED IDEOGRAPH-7FED", 1035 "HANGUL SYLLABLE GA", 1036 "HANGUL SYLLABLE HIH", 1037 "", 1038 "", 1039 "FULLWIDTH LEFT PARENTHESIS", 1040 "FULLWIDTH YEN SIGN", 1041 "", 1042 "CJK UNIFIED IDEOGRAPH-23456" 1043 }; 1044 String oldname[] = {"", "", "", 1045 "", 1046 "", "", "", "", "", "", 1047 "", ""}; 1048 String extendedname[] = {"LATIN SMALL LETTER A", 1049 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 1050 "CJK UNIFIED IDEOGRAPH-3401", 1051 "CJK UNIFIED IDEOGRAPH-7FED", 1052 "HANGUL SYLLABLE GA", 1053 "HANGUL SYLLABLE HIH", 1054 "<lead surrogate-D800>", 1055 "<trail surrogate-DC00>", 1056 "FULLWIDTH LEFT PARENTHESIS", 1057 "FULLWIDTH YEN SIGN", 1058 "<noncharacter-FFFF>", 1059 "CJK UNIFIED IDEOGRAPH-23456"}; 1060 1061 int size = c.length; 1062 String str; 1063 int uc; 1064 1065 for (int i = 0; i < size; i ++) 1066 { 1067 // modern Unicode character name 1068 str = UCharacter.getName(c[i]); 1069 if ((str == null && name[i].length() > 0) || 1070 (str != null && !str.equals(name[i]))) 1071 { 1072 errln("FAIL \\u" + hex(c[i]) + " expected name " + 1073 name[i]); 1074 break; 1075 } 1076 1077 // 1.0 Unicode character name 1078 str = UCharacter.getName1_0(c[i]); 1079 if ((str == null && oldname[i].length() > 0) || 1080 (str != null && !str.equals(oldname[i]))) 1081 { 1082 errln("FAIL \\u" + hex(c[i]) + " expected 1.0 name " + 1083 oldname[i]); 1084 break; 1085 } 1086 1087 // extended character name 1088 str = UCharacter.getExtendedName(c[i]); 1089 if (str == null || !str.equals(extendedname[i])) 1090 { 1091 errln("FAIL \\u" + hex(c[i]) + " expected extended name " + 1092 extendedname[i]); 1093 break; 1094 } 1095 1096 // retrieving unicode character from modern name 1097 uc = UCharacter.getCharFromName(name[i]); 1098 if (uc != c[i] && name[i].length() != 0) 1099 { 1100 errln("FAIL " + name[i] + " expected character \\u" + 1101 hex(c[i])); 1102 break; 1103 } 1104 1105 //retrieving unicode character from 1.0 name 1106 uc = UCharacter.getCharFromName1_0(oldname[i]); 1107 if (uc != c[i] && oldname[i].length() != 0) 1108 { 1109 errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + 1110 hex(c[i])); 1111 break; 1112 } 1113 1114 //retrieving unicode character from 1.0 name 1115 uc = UCharacter.getCharFromExtendedName(extendedname[i]); 1116 if (uc != c[i] && i != 0 && (i == 1 || i == 6)) 1117 { 1118 errln("FAIL " + extendedname[i] + 1119 " expected extended character \\u" + hex(c[i])); 1120 break; 1121 } 1122 } 1123 1124 // test getName works with mixed-case names (new in 2.0) 1125 if (0x61 != UCharacter.getCharFromName("LATin smALl letTER A")) { 1126 errln("FAIL: 'LATin smALl letTER A' should result in character " 1127 + "U+0061"); 1128 } 1129 1130 if (TestFmwk.getExhaustiveness() >= 5) { 1131 // extra testing different from icu 1132 for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++) 1133 { 1134 str = UCharacter.getName(i); 1135 if (str != null && UCharacter.getCharFromName(str) != i) 1136 { 1137 errln("FAIL \\u" + hex(i) + " " + str + 1138 " retrieval of name and vice versa" ); 1139 break; 1140 } 1141 } 1142 } 1143 1144 // Test getCharNameCharacters 1145 if (TestFmwk.getExhaustiveness() >= 10) { 1146 boolean map[] = new boolean[256]; 1147 1148 UnicodeSet set = new UnicodeSet(1, 0); // empty set 1149 UnicodeSet dumb = new UnicodeSet(1, 0); // empty set 1150 1151 // uprv_getCharNameCharacters() will likely return more lowercase 1152 // letters than actual character names contain because 1153 // it includes all the characters in lowercased names of 1154 // general categories, for the full possible set of extended names. 1155 UCharacterName.INSTANCE.getCharNameCharacters(set); 1156 1157 // build set the dumb (but sure-fire) way 1158 Arrays.fill(map, false); 1159 1160 int maxLength = 0; 1161 for (int cp = 0; cp < 0x110000; ++ cp) { 1162 String n = UCharacter.getExtendedName(cp); 1163 int len = n.length(); 1164 if (len > maxLength) { 1165 maxLength = len; 1166 } 1167 1168 for (int i = 0; i < len; ++ i) { 1169 char ch = n.charAt(i); 1170 if (!map[ch & 0xff]) { 1171 dumb.add(ch); 1172 map[ch & 0xff] = true; 1173 } 1174 } 1175 } 1176 1177 length = UCharacterName.INSTANCE.getMaxCharNameLength(); 1178 if (length != maxLength) { 1179 errln("getMaxCharNameLength()=" + length 1180 + " differs from the maximum length " + maxLength 1181 + " of all extended names"); 1182 } 1183 1184 // compare the sets. Where is my uset_equals?!! 1185 boolean ok = true; 1186 for (int i = 0; i < 256; ++ i) { 1187 if (set.contains(i) != dumb.contains(i)) { 1188 if (0x61 <= i && i <= 0x7a // a-z 1189 && set.contains(i) && !dumb.contains(i)) { 1190 // ignore lowercase a-z that are in set but not in dumb 1191 ok = true; 1192 } 1193 else { 1194 ok = false; 1195 break; 1196 } 1197 } 1198 } 1199 1200 String pattern1 = set.toPattern(true); 1201 String pattern2 = dumb.toPattern(true); 1202 1203 if (!ok) { 1204 errln("FAIL: getCharNameCharacters() returned " + pattern1 1205 + " expected " + pattern2 1206 + " (too many lowercase a-z are ok)"); 1207 } else { 1208 logln("Ok: getCharNameCharacters() returned " + pattern1); 1209 } 1210 } 1211 // improve code coverage 1212 String expected = "LATIN SMALL LETTER A|LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK|"+ 1213 "CJK UNIFIED IDEOGRAPH-3401|CJK UNIFIED IDEOGRAPH-7FED|HANGUL SYLLABLE GA|"+ 1214 "HANGUL SYLLABLE HIH|LINEAR B SYLLABLE B008 A|FULLWIDTH LEFT PARENTHESIS|"+ 1215 "FULLWIDTH YEN SIGN|"+ 1216 "null|"+ // getName returns null because 0xFFFF does not have a name, but has an extended name! 1217 "CJK UNIFIED IDEOGRAPH-23456"; 1218 String separator= "|"; 1219 String source = Utility.valueOf(c); 1220 String result = UCharacter.getName(source, separator); 1221 if(!result.equals(expected)){ 1222 errln("UCharacter.getName did not return the expected result.\n\t Expected: "+ expected+"\n\t Got: "+ result); 1223 } 1224 1225 }catch(IllegalArgumentException e){ 1226 if(e.getMessage().indexOf("unames.icu") >= 0){ 1227 warnln("Could not find unames.icu"); 1228 }else{ 1229 throw e; 1230 } 1231 } 1232 1233 } 1234 1235 @Test TestUCharFromNameUnderflow()1236 public void TestUCharFromNameUnderflow() { 1237 // Ticket #10889: Underflow crash when there is no dash. 1238 String name = "<NO BREAK SPACE>"; 1239 int c = UCharacter.getCharFromExtendedName(name); 1240 if(c >= 0) { 1241 errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + 1242 " but should fail (-1)"); 1243 } 1244 1245 // Test related edge cases. 1246 name = "<-00a0>"; 1247 c = UCharacter.getCharFromExtendedName(name); 1248 if(c >= 0) { 1249 errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + 1250 " but should fail (-1)"); 1251 } 1252 1253 name = "<control->"; 1254 c = UCharacter.getCharFromExtendedName(name); 1255 if(c >= 0) { 1256 errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + 1257 " but should fail (-1)"); 1258 } 1259 1260 name = "<control-111111>"; 1261 c = UCharacter.getCharFromExtendedName(name); 1262 if(c >= 0) { 1263 errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + 1264 " but should fail (-1)"); 1265 } 1266 1267 // ICU-20292: integer overflow 1268 name = "<noncharacter-10010FFFF>"; 1269 c = UCharacter.getCharFromExtendedName(name); 1270 if(c >= 0) { 1271 errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + 1272 " but should fail (-1)"); 1273 } 1274 1275 name = "<noncharacter-00010FFFF>"; // too many digits even if only leading 0s 1276 c = UCharacter.getCharFromExtendedName(name); 1277 if(c >= 0) { 1278 errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + 1279 " but should fail (-1)"); 1280 } 1281 1282 name = "<noncharacter-fFFf>>"; 1283 c = UCharacter.getCharFromExtendedName(name); 1284 if(c >= 0) { 1285 errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + 1286 " but should fail (-1)"); 1287 } 1288 } 1289 1290 /** 1291 * Testing name iteration 1292 */ 1293 @Test TestNameIteration()1294 public void TestNameIteration()throws Exception 1295 { 1296 try { 1297 ValueIterator iterator = UCharacter.getExtendedNameIterator(); 1298 ValueIterator.Element element = new ValueIterator.Element(); 1299 ValueIterator.Element old = new ValueIterator.Element(); 1300 // testing subrange 1301 iterator.setRange(-10, -5); 1302 if (iterator.next(element)) { 1303 errln("Fail, expected iterator to return false when range is set outside the meaningful range"); 1304 } 1305 iterator.setRange(0x110000, 0x111111); 1306 if (iterator.next(element)) { 1307 errln("Fail, expected iterator to return false when range is set outside the meaningful range"); 1308 } 1309 try { 1310 iterator.setRange(50, 10); 1311 errln("Fail, expected exception when encountered invalid range"); 1312 } catch (Exception e) { 1313 } 1314 1315 iterator.setRange(-10, 10); 1316 if (!iterator.next(element) || element.integer != 0) { 1317 errln("Fail, expected iterator to return 0 when range start limit is set outside the meaningful range"); 1318 } 1319 1320 iterator.setRange(0x10FFFE, 0x200000); 1321 int last = 0; 1322 while (iterator.next(element)) { 1323 last = element.integer; 1324 } 1325 if (last != 0x10FFFF) { 1326 errln("Fail, expected iterator to return 0x10FFFF when range end limit is set outside the meaningful range"); 1327 } 1328 1329 iterator = UCharacter.getNameIterator(); 1330 iterator.setRange(0xF, 0x45); 1331 while (iterator.next(element)) { 1332 if (element.integer <= old.integer) { 1333 errln("FAIL next returned a less codepoint \\u" + 1334 Integer.toHexString(element.integer) + " than \\u" + 1335 Integer.toHexString(old.integer)); 1336 break; 1337 } 1338 if (!UCharacter.getName(element.integer).equals(element.value)) 1339 { 1340 errln("FAIL next codepoint \\u" + 1341 Integer.toHexString(element.integer) + 1342 " does not have the expected name " + 1343 UCharacter.getName(element.integer) + 1344 " instead have the name " + (String)element.value); 1345 break; 1346 } 1347 old.integer = element.integer; 1348 } 1349 1350 iterator.reset(); 1351 iterator.next(element); 1352 if (element.integer != 0x20) { 1353 errln("FAIL reset in iterator"); 1354 } 1355 1356 iterator.setRange(0, 0x110000); 1357 old.integer = 0; 1358 while (iterator.next(element)) { 1359 if (element.integer != 0 && element.integer <= old.integer) { 1360 errln("FAIL next returned a less codepoint \\u" + 1361 Integer.toHexString(element.integer) + " than \\u" + 1362 Integer.toHexString(old.integer)); 1363 break; 1364 } 1365 if (!UCharacter.getName(element.integer).equals(element.value)) 1366 { 1367 errln("FAIL next codepoint \\u" + 1368 Integer.toHexString(element.integer) + 1369 " does not have the expected name " + 1370 UCharacter.getName(element.integer) + 1371 " instead have the name " + (String)element.value); 1372 break; 1373 } 1374 for (int i = old.integer + 1; i < element.integer; i ++) { 1375 if (UCharacter.getName(i) != null) { 1376 errln("FAIL between codepoints are not null \\u" + 1377 Integer.toHexString(old.integer) + " and " + 1378 Integer.toHexString(element.integer) + " has " + 1379 Integer.toHexString(i) + " with a name " + 1380 UCharacter.getName(i)); 1381 break; 1382 } 1383 } 1384 old.integer = element.integer; 1385 } 1386 1387 iterator = UCharacter.getExtendedNameIterator(); 1388 old.integer = 0; 1389 while (iterator.next(element)) { 1390 if (element.integer != 0 && element.integer != old.integer) { 1391 errln("FAIL next returned a codepoint \\u" + 1392 Integer.toHexString(element.integer) + 1393 " different from \\u" + 1394 Integer.toHexString(old.integer)); 1395 break; 1396 } 1397 if (!UCharacter.getExtendedName(element.integer).equals( 1398 element.value)) { 1399 errln("FAIL next codepoint \\u" + 1400 Integer.toHexString(element.integer) + 1401 " name should be " 1402 + UCharacter.getExtendedName(element.integer) + 1403 " instead of " + (String)element.value); 1404 break; 1405 } 1406 old.integer++; 1407 } 1408 iterator = UCharacter.getName1_0Iterator(); 1409 old.integer = 0; 1410 while (iterator.next(element)) { 1411 logln(Integer.toHexString(element.integer) + " " + 1412 (String)element.value); 1413 if (element.integer != 0 && element.integer <= old.integer) { 1414 errln("FAIL next returned a less codepoint \\u" + 1415 Integer.toHexString(element.integer) + " than \\u" + 1416 Integer.toHexString(old.integer)); 1417 break; 1418 } 1419 if (!element.value.equals(UCharacter.getName1_0( 1420 element.integer))) { 1421 errln("FAIL next codepoint \\u" + 1422 Integer.toHexString(element.integer) + 1423 " name cannot be null"); 1424 break; 1425 } 1426 for (int i = old.integer + 1; i < element.integer; i ++) { 1427 if (UCharacter.getName1_0(i) != null) { 1428 errln("FAIL between codepoints are not null \\u" + 1429 Integer.toHexString(old.integer) + " and " + 1430 Integer.toHexString(element.integer) + " has " + 1431 Integer.toHexString(i) + " with a name " + 1432 UCharacter.getName1_0(i)); 1433 break; 1434 } 1435 } 1436 old.integer = element.integer; 1437 } 1438 } catch(Exception e){ 1439 // !!! wouldn't preflighting be simpler? This looks like 1440 // it is effectively be doing that. It seems that for every 1441 // true error the code will call errln, which will throw the error, which 1442 // this will catch, which this will then rethrow the error. Just seems 1443 // cumbersome. 1444 if(e.getMessage().indexOf("unames.icu") >= 0){ 1445 warnln("Could not find unames.icu"); 1446 } else { 1447 errln(e.getMessage()); 1448 } 1449 } 1450 } 1451 1452 /** 1453 * Testing the for illegal characters 1454 */ 1455 @Test TestIsLegal()1456 public void TestIsLegal() 1457 { 1458 int illegal[] = {0xFFFE, 0x00FFFF, 0x005FFFE, 0x005FFFF, 0x0010FFFE, 1459 0x0010FFFF, 0x110000, 0x00FDD0, 0x00FDDF, 0x00FDE0, 1460 0x00FDEF, 0xD800, 0xDC00, -1}; 1461 int legal[] = {0x61, 0x00FFFD, 0x0010000, 0x005FFFD, 0x0060000, 1462 0x0010FFFD, 0xFDCF, 0x00FDF0}; 1463 for (int count = 0; count < illegal.length; count ++) { 1464 if (UCharacter.isLegal(illegal[count])) { 1465 errln("FAIL \\u" + hex(illegal[count]) + 1466 " is not a legal character"); 1467 } 1468 } 1469 1470 for (int count = 0; count < legal.length; count ++) { 1471 if (!UCharacter.isLegal(legal[count])) { 1472 errln("FAIL \\u" + hex(legal[count]) + 1473 " is a legal character"); 1474 } 1475 } 1476 1477 String illegalStr = "This is an illegal string "; 1478 String legalStr = "This is a legal string "; 1479 1480 for (int count = 0; count < illegal.length; count ++) { 1481 StringBuffer str = new StringBuffer(illegalStr); 1482 if (illegal[count] < 0x10000) { 1483 str.append((char)illegal[count]); 1484 } 1485 else { 1486 char lead = UTF16.getLeadSurrogate(illegal[count]); 1487 char trail = UTF16.getTrailSurrogate(illegal[count]); 1488 str.append(lead); 1489 str.append(trail); 1490 } 1491 if (UCharacter.isLegal(str.toString())) { 1492 errln("FAIL " + hex(str.toString()) + 1493 " is not a legal string"); 1494 } 1495 } 1496 1497 for (int count = 0; count < legal.length; count ++) { 1498 StringBuffer str = new StringBuffer(legalStr); 1499 if (legal[count] < 0x10000) { 1500 str.append((char)legal[count]); 1501 } 1502 else { 1503 char lead = UTF16.getLeadSurrogate(legal[count]); 1504 char trail = UTF16.getTrailSurrogate(legal[count]); 1505 str.append(lead); 1506 str.append(trail); 1507 } 1508 if (!UCharacter.isLegal(str.toString())) { 1509 errln("FAIL " + hex(str.toString()) + " is a legal string"); 1510 } 1511 } 1512 } 1513 1514 /** 1515 * Test getCodePoint 1516 */ 1517 @Test TestCodePoint()1518 public void TestCodePoint() 1519 { 1520 int ch = 0x10000; 1521 for (char i = 0xD800; i < 0xDC00; i ++) { 1522 for (char j = 0xDC00; j <= 0xDFFF; j ++) { 1523 if (UCharacter.getCodePoint(i, j) != ch) { 1524 errln("Error getting codepoint for surrogate " + 1525 "characters \\u" 1526 + Integer.toHexString(i) + " \\u" + 1527 Integer.toHexString(j)); 1528 } 1529 ch ++; 1530 } 1531 } 1532 try 1533 { 1534 UCharacter.getCodePoint((char)0xD7ff, (char)0xDC00); 1535 errln("Invalid surrogate characters should not form a " + 1536 "supplementary"); 1537 } catch(Exception e) { 1538 } 1539 for (char i = 0; i < 0xFFFF; i++) { 1540 if (i == 0xFFFE || 1541 (i >= 0xD800 && i <= 0xDFFF) || 1542 (i >= 0xFDD0 && i <= 0xFDEF)) { 1543 // not a character 1544 try { 1545 UCharacter.getCodePoint(i); 1546 errln("Not a character is not a valid codepoint"); 1547 } catch (Exception e) { 1548 } 1549 } 1550 else { 1551 if (UCharacter.getCodePoint(i) != i) { 1552 errln("A valid codepoint should return itself"); 1553 } 1554 } 1555 } 1556 } 1557 1558 /** 1559 * This method is a little different from the type test in icu4c. 1560 * But combined with testUnicodeData, they basically do the same thing. 1561 */ 1562 @Test TestIteration()1563 public void TestIteration() 1564 { 1565 int limit = 0; 1566 int prevtype = -1; 1567 int shouldBeDir; 1568 int test[][]={{0x41, UCharacterCategory.UPPERCASE_LETTER}, 1569 {0x308, UCharacterCategory.NON_SPACING_MARK}, 1570 {0xfffe, UCharacterCategory.GENERAL_OTHER_TYPES}, 1571 {0xe0041, UCharacterCategory.FORMAT}, 1572 {0xeffff, UCharacterCategory.UNASSIGNED}}; 1573 1574 // default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header 1575 int defaultBidi[][]={ 1576 { 0x0590, UCharacterDirection.LEFT_TO_RIGHT }, 1577 { 0x0600, UCharacterDirection.RIGHT_TO_LEFT }, 1578 { 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1579 { 0x0860, UCharacterDirection.RIGHT_TO_LEFT }, 1580 { 0x0870, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, // Unicode 10 changes U+0860..U+086F from R to AL. 1581 { 0x08A0, UCharacterDirection.RIGHT_TO_LEFT }, 1582 { 0x0900, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */ 1583 { 0x20A0, UCharacterDirection.LEFT_TO_RIGHT }, 1584 { 0x20D0, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */ 1585 { 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT }, 1586 { 0xFB50, UCharacterDirection.RIGHT_TO_LEFT }, 1587 { 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1588 { 0xFE70, UCharacterDirection.LEFT_TO_RIGHT }, 1589 { 0xFF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1590 1591 { 0x10800, UCharacterDirection.LEFT_TO_RIGHT }, 1592 { 0x10D00, UCharacterDirection.RIGHT_TO_LEFT }, // Unicode 11 changes U+10D00..U+10D3F from R to AL. 1593 { 0x10D40, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1594 { 0x10F30, UCharacterDirection.RIGHT_TO_LEFT }, // Unicode 11 changes U+10F30..U+10F6F from R to AL. 1595 { 0x10F70, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1596 { 0x11000, UCharacterDirection.RIGHT_TO_LEFT }, 1597 1598 { 0x1E800, UCharacterDirection.LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */ 1599 { 0x1EC70, UCharacterDirection.RIGHT_TO_LEFT }, // Unicode 11 changes U+1EC70..U+1ECBF from R to AL. 1600 { 0x1ECC0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1601 { 0x1ED00, UCharacterDirection.RIGHT_TO_LEFT }, // Unicode 12 changes U+1ED00..U+1ED4F from R to AL. 1602 { 0x1ED50, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1603 { 0x1EE00, UCharacterDirection.RIGHT_TO_LEFT }, 1604 { 0x1EF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */ 1605 { 0x1F000, UCharacterDirection.RIGHT_TO_LEFT }, 1606 { 0x110000, UCharacterDirection.LEFT_TO_RIGHT } 1607 }; 1608 1609 RangeValueIterator iterator = UCharacter.getTypeIterator(); 1610 RangeValueIterator.Element result = new RangeValueIterator.Element(); 1611 while (iterator.next(result)) { 1612 if (result.start != limit) { 1613 errln("UCharacterIteration failed: Ranges not continuous " + 1614 "0x" + Integer.toHexString(result.start)); 1615 } 1616 1617 limit = result.limit; 1618 if (result.value == prevtype) { 1619 errln("Type of the next set of enumeration should be different"); 1620 } 1621 prevtype = result.value; 1622 1623 for (int i = result.start; i < limit; i ++) { 1624 int temptype = UCharacter.getType(i); 1625 if (temptype != result.value) { 1626 errln("UCharacterIteration failed: Codepoint \\u" + 1627 Integer.toHexString(i) + " should be of type " + 1628 temptype + " not " + result.value); 1629 } 1630 } 1631 1632 for (int i = 0; i < test.length; ++ i) { 1633 if (result.start <= test[i][0] && test[i][0] < result.limit) { 1634 if (result.value != test[i][1]) { 1635 errln("error: getTypes() has range [" 1636 + Integer.toHexString(result.start) + ", " 1637 + Integer.toHexString(result.limit) 1638 + "] with type " + result.value 1639 + " instead of [" 1640 + Integer.toHexString(test[i][0]) + ", " 1641 + Integer.toHexString(test[i][1])); 1642 } 1643 } 1644 } 1645 1646 // LineBreak.txt specifies: 1647 // # - Assigned characters that are not listed explicitly are given the value 1648 // # "AL". 1649 // # - Unassigned characters are given the value "XX". 1650 // 1651 // PUA characters are listed explicitly with "XX". 1652 // Verify that no assigned character has "XX". 1653 if (result.value != UCharacterCategory.UNASSIGNED 1654 && result.value != UCharacterCategory.PRIVATE_USE) { 1655 int c = result.start; 1656 while (c < result.limit) { 1657 if (0 == UCharacter.getIntPropertyValue(c, 1658 UProperty.LINE_BREAK)) { 1659 logln("error UProperty.LINE_BREAK(assigned \\u" 1660 + Utility.hex(c, 4) + ")=XX"); 1661 } 1662 ++ c; 1663 } 1664 } 1665 1666 /* 1667 * Verify default Bidi classes. 1668 * See DerivedBidiClass.txt, especially for unassigned code points. 1669 */ 1670 if (result.value == UCharacterCategory.UNASSIGNED 1671 || result.value == UCharacterCategory.PRIVATE_USE) { 1672 int c = result.start; 1673 for (int i = 0; i < defaultBidi.length && c < result.limit; 1674 ++ i) { 1675 if (c < defaultBidi[i][0]) { 1676 while (c < result.limit && c < defaultBidi[i][0]) { 1677 // TODO change to public UCharacter.isNonCharacter(c) once it's available 1678 if(ohos.global.icu.impl.UCharacterUtility.isNonCharacter(c) || UCharacter.hasBinaryProperty(c, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) { 1679 shouldBeDir=UCharacter.BOUNDARY_NEUTRAL; 1680 } else { 1681 shouldBeDir=defaultBidi[i][1]; 1682 } 1683 1684 if (UCharacter.getDirection(c) != shouldBeDir 1685 || UCharacter.getIntPropertyValue(c, 1686 UProperty.BIDI_CLASS) 1687 != shouldBeDir) { 1688 errln("error: getDirection(unassigned/PUA " 1689 + Integer.toHexString(c) 1690 + ") should be " 1691 + shouldBeDir); 1692 } 1693 ++ c; 1694 } 1695 } 1696 } 1697 } 1698 } 1699 1700 iterator.reset(); 1701 if (iterator.next(result) == false || result.start != 0) { 1702 System.out.println("result " + result.start); 1703 errln("UCharacterIteration reset() failed"); 1704 } 1705 } 1706 1707 /** 1708 * Testing getAge 1709 */ 1710 @Test TestGetAge()1711 public void TestGetAge() 1712 { 1713 int ages[] = {0x41, 1, 1, 0, 0, 1714 0xffff, 1, 1, 0, 0, 1715 0x20ab, 2, 0, 0, 0, 1716 0x2fffe, 2, 0, 0, 0, 1717 0x20ac, 2, 1, 0, 0, 1718 0xfb1d, 3, 0, 0, 0, 1719 0x3f4, 3, 1, 0, 0, 1720 0x10300, 3, 1, 0, 0, 1721 0x220, 3, 2, 0, 0, 1722 0xff60, 3, 2, 0, 0}; 1723 for (int i = 0; i < ages.length; i += 5) { 1724 VersionInfo age = UCharacter.getAge(ages[i]); 1725 if (age != VersionInfo.getInstance(ages[i + 1], ages[i + 2], 1726 ages[i + 3], ages[i + 4])) { 1727 errln("error: getAge(\\u" + Integer.toHexString(ages[i]) + 1728 ") == " + age.toString() + " instead of " + 1729 ages[i + 1] + "." + ages[i + 2] + "." + ages[i + 3] + 1730 "." + ages[i + 4]); 1731 } 1732 } 1733 1734 int[] valid_tests = { 1735 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 1736 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 1737 int[] invalid_tests = { 1738 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 1739 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 1740 1741 for(int i=0; i< valid_tests.length; i++){ 1742 try{ 1743 UCharacter.getAge(valid_tests[i]); 1744 } catch(Exception e){ 1745 errln("UCharacter.getAge(int) was not suppose to have " + 1746 "an exception. Value passed: " + valid_tests[i]); 1747 } 1748 } 1749 1750 for(int i=0; i< invalid_tests.length; i++){ 1751 try{ 1752 UCharacter.getAge(invalid_tests[i]); 1753 errln("UCharacter.getAge(int) was suppose to have " + 1754 "an exception. Value passed: " + invalid_tests[i]); 1755 } catch(Exception e){ 1756 } 1757 } 1758 } 1759 1760 /** 1761 * Test binary non core properties 1762 */ 1763 @Test TestAdditionalProperties()1764 public void TestAdditionalProperties() 1765 { 1766 final int FALSE = 0; 1767 final int TRUE = 1; 1768 // test data for hasBinaryProperty() 1769 int props[][] = { // code point, property 1770 { 0x0627, UProperty.ALPHABETIC, 1 }, 1771 { 0x1034a, UProperty.ALPHABETIC, 1 }, 1772 { 0x2028, UProperty.ALPHABETIC, 0 }, 1773 1774 { 0x0066, UProperty.ASCII_HEX_DIGIT, 1 }, 1775 { 0x0067, UProperty.ASCII_HEX_DIGIT, 0 }, 1776 1777 { 0x202c, UProperty.BIDI_CONTROL, 1 }, 1778 { 0x202f, UProperty.BIDI_CONTROL, 0 }, 1779 1780 { 0x003c, UProperty.BIDI_MIRRORED, 1 }, 1781 { 0x003d, UProperty.BIDI_MIRRORED, 0 }, 1782 1783 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 1784 { 0x2018, UProperty.BIDI_MIRRORED, 0 }, 1785 { 0x201d, UProperty.BIDI_MIRRORED, 0 }, 1786 { 0x201f, UProperty.BIDI_MIRRORED, 0 }, 1787 { 0x301e, UProperty.BIDI_MIRRORED, 0 }, 1788 1789 { 0x058a, UProperty.DASH, 1 }, 1790 { 0x007e, UProperty.DASH, 0 }, 1791 1792 { 0x0c4d, UProperty.DIACRITIC, 1 }, 1793 { 0x3000, UProperty.DIACRITIC, 0 }, 1794 1795 { 0x0e46, UProperty.EXTENDER, 1 }, 1796 { 0x0020, UProperty.EXTENDER, 0 }, 1797 1798 { 0xfb1d, UProperty.FULL_COMPOSITION_EXCLUSION, 1 }, 1799 { 0x1d15f, UProperty.FULL_COMPOSITION_EXCLUSION, 1 }, 1800 { 0xfb1e, UProperty.FULL_COMPOSITION_EXCLUSION, 0 }, 1801 1802 { 0x110a, UProperty.NFD_INERT, 1 }, /* Jamo L */ 1803 { 0x0308, UProperty.NFD_INERT, 0 }, 1804 1805 { 0x1164, UProperty.NFKD_INERT, 1 }, /* Jamo V */ 1806 { 0x1d79d, UProperty.NFKD_INERT, 0 }, /* math compat version of xi */ 1807 1808 { 0x0021, UProperty.NFC_INERT, 1 }, /* ! */ 1809 { 0x0061, UProperty.NFC_INERT, 0 }, /* a */ 1810 { 0x00e4, UProperty.NFC_INERT, 0 }, /* a-umlaut */ 1811 { 0x0102, UProperty.NFC_INERT, 0 }, /* a-breve */ 1812 { 0xac1c, UProperty.NFC_INERT, 0 }, /* Hangul LV */ 1813 { 0xac1d, UProperty.NFC_INERT, 1 }, /* Hangul LVT */ 1814 1815 { 0x1d79d, UProperty.NFKC_INERT, 0 }, /* math compat version of xi */ 1816 { 0x2a6d6, UProperty.NFKC_INERT, 1 }, /* Han, last of CJK ext. B */ 1817 1818 { 0x00e4, UProperty.SEGMENT_STARTER, 1 }, 1819 { 0x0308, UProperty.SEGMENT_STARTER, 0 }, 1820 { 0x110a, UProperty.SEGMENT_STARTER, 1 }, /* Jamo L */ 1821 { 0x1164, UProperty.SEGMENT_STARTER, 0 },/* Jamo V */ 1822 { 0xac1c, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LV */ 1823 { 0xac1d, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LVT */ 1824 1825 { 0x0044, UProperty.HEX_DIGIT, 1 }, 1826 { 0xff46, UProperty.HEX_DIGIT, 1 }, 1827 { 0x0047, UProperty.HEX_DIGIT, 0 }, 1828 1829 { 0x30fb, UProperty.HYPHEN, 1 }, 1830 { 0xfe58, UProperty.HYPHEN, 0 }, 1831 1832 { 0x2172, UProperty.ID_CONTINUE, 1 }, 1833 { 0x0307, UProperty.ID_CONTINUE, 1 }, 1834 { 0x005c, UProperty.ID_CONTINUE, 0 }, 1835 1836 { 0x2172, UProperty.ID_START, 1 }, 1837 { 0x007a, UProperty.ID_START, 1 }, 1838 { 0x0039, UProperty.ID_START, 0 }, 1839 1840 { 0x4db5, UProperty.IDEOGRAPHIC, 1 }, 1841 { 0x2f999, UProperty.IDEOGRAPHIC, 1 }, 1842 { 0x2f99, UProperty.IDEOGRAPHIC, 0 }, 1843 1844 { 0x200c, UProperty.JOIN_CONTROL, 1 }, 1845 { 0x2029, UProperty.JOIN_CONTROL, 0 }, 1846 1847 { 0x1d7bc, UProperty.LOWERCASE, 1 }, 1848 { 0x0345, UProperty.LOWERCASE, 1 }, 1849 { 0x0030, UProperty.LOWERCASE, 0 }, 1850 1851 { 0x1d7a9, UProperty.MATH, 1 }, 1852 { 0x2135, UProperty.MATH, 1 }, 1853 { 0x0062, UProperty.MATH, 0 }, 1854 1855 { 0xfde1, UProperty.NONCHARACTER_CODE_POINT, 1 }, 1856 { 0x10ffff, UProperty.NONCHARACTER_CODE_POINT, 1 }, 1857 { 0x10fffd, UProperty.NONCHARACTER_CODE_POINT, 0 }, 1858 1859 { 0x0022, UProperty.QUOTATION_MARK, 1 }, 1860 { 0xff62, UProperty.QUOTATION_MARK, 1 }, 1861 { 0xd840, UProperty.QUOTATION_MARK, 0 }, 1862 1863 { 0x061f, UProperty.TERMINAL_PUNCTUATION, 1 }, 1864 { 0xe003f, UProperty.TERMINAL_PUNCTUATION, 0 }, 1865 1866 { 0x1d44a, UProperty.UPPERCASE, 1 }, 1867 { 0x2162, UProperty.UPPERCASE, 1 }, 1868 { 0x0345, UProperty.UPPERCASE, 0 }, 1869 1870 { 0x0020, UProperty.WHITE_SPACE, 1 }, 1871 { 0x202f, UProperty.WHITE_SPACE, 1 }, 1872 { 0x3001, UProperty.WHITE_SPACE, 0 }, 1873 1874 { 0x0711, UProperty.XID_CONTINUE, 1 }, 1875 { 0x1d1aa, UProperty.XID_CONTINUE, 1 }, 1876 { 0x007c, UProperty.XID_CONTINUE, 0 }, 1877 1878 { 0x16ee, UProperty.XID_START, 1 }, 1879 { 0x23456, UProperty.XID_START, 1 }, 1880 { 0x1d1aa, UProperty.XID_START, 0 }, 1881 1882 /* 1883 * Version break: 1884 * The following properties are only supported starting with the 1885 * Unicode version indicated in the second field. 1886 */ 1887 { -1, 0x320, 0 }, 1888 1889 { 0x180c, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 }, 1890 { 0xfe02, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 }, 1891 { 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 }, 1892 1893 { 0x0149, UProperty.DEPRECATED, 1 }, /* changed in Unicode 5.2 */ 1894 { 0x0341, UProperty.DEPRECATED, 0 }, /* changed in Unicode 5.2 */ 1895 { 0xe0001, UProperty.DEPRECATED, 1 }, /* Changed from Unicode 5 to 5.1 */ 1896 { 0xe0100, UProperty.DEPRECATED, 0 }, 1897 1898 { 0x00a0, UProperty.GRAPHEME_BASE, 1 }, 1899 { 0x0a4d, UProperty.GRAPHEME_BASE, 0 }, 1900 { 0xff9d, UProperty.GRAPHEME_BASE, 1 }, 1901 { 0xff9f, UProperty.GRAPHEME_BASE, 0 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */ 1902 1903 { 0x0300, UProperty.GRAPHEME_EXTEND, 1 }, 1904 { 0xff9d, UProperty.GRAPHEME_EXTEND, 0 }, 1905 { 0xff9f, UProperty.GRAPHEME_EXTEND, 1 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */ 1906 { 0x0603, UProperty.GRAPHEME_EXTEND, 0 }, 1907 1908 { 0x0a4d, UProperty.GRAPHEME_LINK, 1 }, 1909 { 0xff9f, UProperty.GRAPHEME_LINK, 0 }, 1910 1911 { 0x2ff7, UProperty.IDS_BINARY_OPERATOR, 1 }, 1912 { 0x2ff3, UProperty.IDS_BINARY_OPERATOR, 0 }, 1913 1914 { 0x2ff3, UProperty.IDS_TRINARY_OPERATOR, 1 }, 1915 { 0x2f03, UProperty.IDS_TRINARY_OPERATOR, 0 }, 1916 1917 { 0x0ec1, UProperty.LOGICAL_ORDER_EXCEPTION, 1 }, 1918 { 0xdcba, UProperty.LOGICAL_ORDER_EXCEPTION, 0 }, 1919 1920 { 0x2e9b, UProperty.RADICAL, 1 }, 1921 { 0x4e00, UProperty.RADICAL, 0 }, 1922 1923 { 0x012f, UProperty.SOFT_DOTTED, 1 }, 1924 { 0x0049, UProperty.SOFT_DOTTED, 0 }, 1925 1926 { 0xfa11, UProperty.UNIFIED_IDEOGRAPH, 1 }, 1927 { 0xfa12, UProperty.UNIFIED_IDEOGRAPH, 0 }, 1928 1929 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */ 1930 1931 { 0x002e, UProperty.S_TERM, 1 }, 1932 { 0x0061, UProperty.S_TERM, 0 }, 1933 1934 { 0x180c, UProperty.VARIATION_SELECTOR, 1 }, 1935 { 0xfe03, UProperty.VARIATION_SELECTOR, 1 }, 1936 { 0xe01ef, UProperty.VARIATION_SELECTOR, 1 }, 1937 { 0xe0200, UProperty.VARIATION_SELECTOR, 0 }, 1938 1939 /* enum/integer type properties */ 1940 /* test default Bidi classes for unassigned code points */ 1941 { 0x0590, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1942 { 0x05cf, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1943 { 0x05ed, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1944 { 0x07f2, UProperty.BIDI_CLASS, UCharacterDirection.DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */ 1945 { 0x07fe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, /* unassigned R */ 1946 { 0x089f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1947 { 0xfb37, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1948 { 0xfb42, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1949 { 0x10806, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1950 { 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1951 { 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1952 1953 { 0x061d, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1954 { 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1955 { 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1956 { 0x0775, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1957 { 0xfbc2, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1958 { 0xfd90, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1959 { 0xfefe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1960 1961 { 0x02AF, UProperty.BLOCK, UCharacter.UnicodeBlock.IPA_EXTENSIONS.getID() }, 1962 { 0x0C4E, UProperty.BLOCK, UCharacter.UnicodeBlock.TELUGU.getID()}, 1963 { 0x155A, UProperty.BLOCK, UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS.getID() }, 1964 { 0x1717, UProperty.BLOCK, UCharacter.UnicodeBlock.TAGALOG.getID() }, 1965 { 0x1900, UProperty.BLOCK, UCharacter.UnicodeBlock.LIMBU.getID() }, 1966 { 0x0870, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID()}, 1967 { 0x3040, UProperty.BLOCK, UCharacter.UnicodeBlock.HIRAGANA.getID()}, 1968 { 0x1D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS.getID()}, 1969 { 0x50000, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() }, 1970 { 0xEFFFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() }, 1971 { 0x10D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B.getID() }, 1972 1973 /* UProperty.CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */ 1974 { 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS, 0 }, 1975 1976 { 0x00A0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NOBREAK }, 1977 { 0x00A8, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.COMPAT }, 1978 { 0x00bf, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE }, 1979 { 0x00c0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1980 { 0x1E9B, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1981 { 0xBCDE, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1982 { 0xFB5D, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.MEDIAL }, 1983 { 0x1D736, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.FONT }, 1984 { 0xe0033, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE }, 1985 1986 { 0x0009, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1987 { 0x0020, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NARROW }, 1988 { 0x00B1, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1989 { 0x20A9, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.HALFWIDTH }, 1990 { 0x2FFB, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1991 { 0x3000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.FULLWIDTH }, 1992 { 0x35bb, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1993 { 0x58bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1994 { 0xD7A3, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1995 { 0xEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1996 { 0x1D198, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1997 { 0x20000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1998 { 0x2F8C7, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1999 { 0x3a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 2000 { 0x5a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 2001 { 0xFEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 2002 { 0x10EEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 2003 2004 /* UProperty.GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */ 2005 { 0xd7c7, UProperty.GENERAL_CATEGORY, 0 }, 2006 { 0xd7d7, UProperty.GENERAL_CATEGORY, UCharacterEnums.ECharacterCategory.OTHER_LETTER }, /* changed in Unicode 5.2 */ 2007 2008 { 0x0444, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 2009 { 0x0639, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.AIN }, 2010 { 0x072A, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.DALATH_RISH }, 2011 { 0x0647, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH }, 2012 { 0x06C1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH_GOAL }, 2013 2014 { 0x200C, UProperty.JOINING_TYPE, UCharacter.JoiningType.NON_JOINING }, 2015 { 0x200D, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING }, 2016 { 0x0639, UProperty.JOINING_TYPE, UCharacter.JoiningType.DUAL_JOINING }, 2017 { 0x0640, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING }, 2018 { 0x06C3, UProperty.JOINING_TYPE, UCharacter.JoiningType.RIGHT_JOINING }, 2019 { 0x0300, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 2020 { 0x070F, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 2021 { 0xe0033, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 2022 2023 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */ 2024 { 0xe7e7, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN }, 2025 { 0x10fffd, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN }, 2026 { 0x0028, UProperty.LINE_BREAK, UCharacter.LineBreak.OPEN_PUNCTUATION }, 2027 { 0x232A, UProperty.LINE_BREAK, UCharacter.LineBreak.CLOSE_PUNCTUATION }, 2028 { 0x3401, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 2029 { 0x4e02, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 2030 { 0x20004, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 2031 { 0xf905, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 2032 { 0xdb7e, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 2033 { 0xdbfd, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 2034 { 0xdffc, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 2035 { 0x2762, UProperty.LINE_BREAK, UCharacter.LineBreak.EXCLAMATION }, 2036 { 0x002F, UProperty.LINE_BREAK, UCharacter.LineBreak.BREAK_SYMBOLS }, 2037 { 0x1D49C, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC }, 2038 { 0x1731, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC }, 2039 2040 /* UProperty.NUMERIC_TYPE tested in TestNumericProperties() */ 2041 2042 /* UProperty.SCRIPT tested in TestUScriptCodeAPI() */ 2043 2044 { 0x10ff, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2045 { 0x1100, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2046 { 0x1111, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2047 { 0x1159, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2048 { 0x115a, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2049 { 0x115e, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2050 { 0x115f, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2051 2052 { 0xa95f, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2053 { 0xa960, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2054 { 0xa97c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2055 { 0xa97d, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2056 2057 { 0x1160, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2058 { 0x1161, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2059 { 0x1172, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2060 { 0x11a2, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2061 { 0x11a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2062 { 0x11a7, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2063 2064 { 0xd7af, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2065 { 0xd7b0, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2066 { 0xd7c6, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2067 { 0xd7c7, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2068 2069 { 0x11a8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2070 { 0x11b8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2071 { 0x11c8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2072 { 0x11f9, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2073 { 0x11fa, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2074 { 0x11ff, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2075 { 0x1200, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2076 2077 { 0xd7ca, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2078 { 0xd7cb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2079 { 0xd7fb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2080 { 0xd7fc, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2081 2082 { 0xac00, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2083 { 0xac1c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2084 { 0xc5ec, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2085 { 0xd788, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2086 2087 { 0xac01, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2088 { 0xac1b, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2089 { 0xac1d, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2090 { 0xc5ee, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2091 { 0xd7a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2092 2093 { 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2094 2095 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */ 2096 2097 { 0x00d7, UProperty.PATTERN_SYNTAX, 1 }, 2098 { 0xfe45, UProperty.PATTERN_SYNTAX, 1 }, 2099 { 0x0061, UProperty.PATTERN_SYNTAX, 0 }, 2100 2101 { 0x0020, UProperty.PATTERN_WHITE_SPACE, 1 }, 2102 { 0x0085, UProperty.PATTERN_WHITE_SPACE, 1 }, 2103 { 0x200f, UProperty.PATTERN_WHITE_SPACE, 1 }, 2104 { 0x00a0, UProperty.PATTERN_WHITE_SPACE, 0 }, 2105 { 0x3000, UProperty.PATTERN_WHITE_SPACE, 0 }, 2106 2107 { 0x1d200, UProperty.BLOCK, UCharacter.UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION_ID }, 2108 { 0x2c8e, UProperty.BLOCK, UCharacter.UnicodeBlock.COPTIC_ID }, 2109 { 0xfe17, UProperty.BLOCK, UCharacter.UnicodeBlock.VERTICAL_FORMS_ID }, 2110 2111 { 0x1a00, UProperty.SCRIPT, UScript.BUGINESE }, 2112 { 0x2cea, UProperty.SCRIPT, UScript.COPTIC }, 2113 { 0xa82b, UProperty.SCRIPT, UScript.SYLOTI_NAGRI }, 2114 { 0x103d0, UProperty.SCRIPT, UScript.OLD_PERSIAN }, 2115 2116 { 0xcc28, UProperty.LINE_BREAK, UCharacter.LineBreak.H2 }, 2117 { 0xcc29, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 }, 2118 { 0xac03, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 }, 2119 { 0x115f, UProperty.LINE_BREAK, UCharacter.LineBreak.JL }, 2120 { 0x11aa, UProperty.LINE_BREAK, UCharacter.LineBreak.JT }, 2121 { 0x11a1, UProperty.LINE_BREAK, UCharacter.LineBreak.JV }, 2122 2123 { 0xb2c9, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.LVT }, 2124 { 0x036f, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.EXTEND }, 2125 { 0x0000, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.CONTROL }, 2126 { 0x1160, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.V }, 2127 2128 { 0x05f4, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDLETTER }, 2129 { 0x4ef0, UProperty.WORD_BREAK, UCharacter.WordBreak.OTHER }, 2130 { 0x19d9, UProperty.WORD_BREAK, UCharacter.WordBreak.NUMERIC }, 2131 { 0x2044, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDNUM }, 2132 2133 { 0xfffd, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.OTHER }, 2134 { 0x1ffc, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.UPPER }, 2135 { 0xff63, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.CLOSE }, 2136 { 0x2028, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.SEP }, 2137 2138 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */ 2139 2140 /* unassigned code points in new default Bidi R blocks */ 2141 { 0x1ede4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 2142 { 0x1efe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 2143 2144 /* test some script codes >127 */ 2145 { 0xa6e6, UProperty.SCRIPT, UScript.BAMUM }, 2146 { 0xa4d0, UProperty.SCRIPT, UScript.LISU }, 2147 { 0x10a7f, UProperty.SCRIPT, UScript.OLD_SOUTH_ARABIAN }, 2148 2149 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */ 2150 2151 /* value changed in Unicode 6.0 */ 2152 { 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.TEH_MARBUTA_GOAL }, 2153 2154 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */ 2155 2156 /* unassigned code points in new/changed default Bidi AL blocks */ 2157 { 0x08ba, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 2158 { 0x1eee4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 2159 2160 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */ 2161 2162 /* unassigned code points in the currency symbols block now default to ET */ 2163 { 0x20C0, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, 2164 { 0x20CF, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, 2165 2166 /* new property in Unicode 6.3 */ 2167 { 0x0027, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE }, 2168 { 0x0028, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN }, 2169 { 0x0029, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE }, 2170 { 0xFF5C, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE }, 2171 { 0xFF5B, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN }, 2172 { 0xFF5D, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE }, 2173 2174 { -1, 0x700, 0 }, /* version break for Unicode 7.0 */ 2175 2176 /* new character range with Joining_Group values */ 2177 { 0x10ABF, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 2178 { 0x10AC0, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_ALEPH }, 2179 { 0x10AC1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_BETH }, 2180 { 0x10AEF, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_HUNDRED }, 2181 { 0x10AF0, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 2182 2183 { -1, 0xa00, 0 }, // version break for Unicode 10 2184 2185 { 0x1F1E5, UProperty.REGIONAL_INDICATOR, FALSE }, 2186 { 0x1F1E7, UProperty.REGIONAL_INDICATOR, TRUE }, 2187 { 0x1F1FF, UProperty.REGIONAL_INDICATOR, TRUE }, 2188 { 0x1F200, UProperty.REGIONAL_INDICATOR, FALSE }, 2189 2190 { 0x0600, UProperty.PREPENDED_CONCATENATION_MARK, TRUE }, 2191 { 0x0606, UProperty.PREPENDED_CONCATENATION_MARK, FALSE }, 2192 { 0x110BD, UProperty.PREPENDED_CONCATENATION_MARK, TRUE }, 2193 2194 /* undefined UProperty values */ 2195 { 0x61, 0x4a7, 0 }, 2196 { 0x234bc, 0x15ed, 0 } 2197 }; 2198 2199 2200 if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0 2201 || UCharacter.getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0 2202 || UCharacter.getIntPropertyMinValue(UProperty.BLOCK)!= 0 /* j2478 */ 2203 || UCharacter.getIntPropertyMinValue(UProperty.SCRIPT)!= 0 /* JB#2410 */ 2204 || UCharacter.getIntPropertyMinValue(0x2345) != 0) { 2205 errln("error: UCharacter.getIntPropertyMinValue() wrong"); 2206 } 2207 2208 if( UCharacter.getIntPropertyMaxValue(UProperty.DASH)!=1) { 2209 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DASH) wrong\n"); 2210 } 2211 if( UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE)!=1) { 2212 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) wrong\n"); 2213 } 2214 if( UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1)!=1) { 2215 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) wrong\n"); 2216 } 2217 2218 if( UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)!=UCharacterDirection.CHAR_DIRECTION_COUNT-1 ) { 2219 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) wrong\n"); 2220 } 2221 if( UCharacter.getIntPropertyMaxValue(UProperty.BLOCK)!=UCharacter.UnicodeBlock.COUNT-1 ) { 2222 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) wrong\n"); 2223 } 2224 if(UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK)!=UCharacter.LineBreak.COUNT-1) { 2225 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) wrong\n"); 2226 } 2227 if(UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)!=UScript.CODE_LIMIT-1) { 2228 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) wrong\n"); 2229 } 2230 if(UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE)!=UCharacter.NumericType.COUNT-1) { 2231 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) wrong\n"); 2232 } 2233 if(UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY)!=UCharacterCategory.CHAR_CATEGORY_COUNT-1) { 2234 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) wrong\n"); 2235 } 2236 if(UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE)!=UCharacter.HangulSyllableType.COUNT-1) { 2237 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) wrong\n"); 2238 } 2239 if(UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK)!=UCharacter.GraphemeClusterBreak.COUNT-1) { 2240 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) wrong\n"); 2241 } 2242 if(UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK)!=UCharacter.SentenceBreak.COUNT-1) { 2243 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) wrong\n"); 2244 } 2245 if(UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK)!=UCharacter.WordBreak.COUNT-1) { 2246 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n"); 2247 } 2248 if(UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE)!=UCharacter.BidiPairedBracketType.COUNT-1) { 2249 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE) wrong\n"); 2250 } 2251 /*JB#2410*/ 2252 if( UCharacter.getIntPropertyMaxValue(0x2345)!=-1) { 2253 errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n"); 2254 } 2255 if( UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) != (UCharacter.DecompositionType.COUNT - 1)) { 2256 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) wrong\n"); 2257 } 2258 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) != (UCharacter.JoiningGroup.COUNT -1)) { 2259 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) wrong\n"); 2260 } 2261 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) != (UCharacter.JoiningType.COUNT -1)) { 2262 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) wrong\n"); 2263 } 2264 if( UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) != (UCharacter.EastAsianWidth.COUNT -1)) { 2265 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) wrong\n"); 2266 } 2267 2268 VersionInfo version = UCharacter.getUnicodeVersion(); 2269 2270 // test hasBinaryProperty() 2271 for (int i = 0; i < props.length; ++ i) { 2272 int which = props[i][1]; 2273 if (props[i][0] < 0) { 2274 if (version.compareTo(VersionInfo.getInstance(which >> 8, 2275 (which >> 4) & 0xF, 2276 which & 0xF, 2277 0)) < 0) { 2278 break; 2279 } 2280 continue; 2281 } 2282 String whichName; 2283 try { 2284 whichName = UCharacter.getPropertyName(which, UProperty.NameChoice.LONG); 2285 } catch(IllegalArgumentException e) { 2286 // There are intentionally invalid property integer values ("which"). 2287 // Catch and ignore the exception from getPropertyName(). 2288 whichName = "undefined UProperty value"; 2289 } 2290 boolean expect = true; 2291 if (props[i][2] == 0) { 2292 expect = false; 2293 } 2294 if (which < UProperty.INT_START) { 2295 if (UCharacter.hasBinaryProperty(props[i][0], which) 2296 != expect) { 2297 errln("error: UCharacter.hasBinaryProperty(U+" + 2298 Utility.hex(props[i][0], 4) + ", " + 2299 whichName + ") has an error, expected=" + expect); 2300 } 2301 } 2302 2303 int retVal = UCharacter.getIntPropertyValue(props[i][0], which); 2304 if (retVal != props[i][2]) { 2305 errln("error: UCharacter.getIntPropertyValue(U+" + 2306 Utility.hex(props[i][0], 4) + 2307 ", " + whichName + ") is wrong, expected=" 2308 + props[i][2] + " actual=" + retVal); 2309 } 2310 2311 // test separate functions, too 2312 switch (which) { 2313 case UProperty.ALPHABETIC: 2314 if (UCharacter.isUAlphabetic(props[i][0]) != expect) { 2315 errln("error: UCharacter.isUAlphabetic(\\u" + 2316 Integer.toHexString(props[i][0]) + 2317 ") is wrong expected " + props[i][2]); 2318 } 2319 break; 2320 case UProperty.LOWERCASE: 2321 if (UCharacter.isULowercase(props[i][0]) != expect) { 2322 errln("error: UCharacter.isULowercase(\\u" + 2323 Integer.toHexString(props[i][0]) + 2324 ") is wrong expected " +props[i][2]); 2325 } 2326 break; 2327 case UProperty.UPPERCASE: 2328 if (UCharacter.isUUppercase(props[i][0]) != expect) { 2329 errln("error: UCharacter.isUUppercase(\\u" + 2330 Integer.toHexString(props[i][0]) + 2331 ") is wrong expected " + props[i][2]); 2332 } 2333 break; 2334 case UProperty.WHITE_SPACE: 2335 if (UCharacter.isUWhiteSpace(props[i][0]) != expect) { 2336 errln("error: UCharacter.isUWhiteSpace(\\u" + 2337 Integer.toHexString(props[i][0]) + 2338 ") is wrong expected " + props[i][2]); 2339 } 2340 break; 2341 default: 2342 break; 2343 } 2344 } 2345 } 2346 2347 @Test TestNumericProperties()2348 public void TestNumericProperties() 2349 { 2350 // see UnicodeData.txt, DerivedNumericValues.txt 2351 double values[][] = { 2352 // Code point, numeric type, numeric value. 2353 // If a fourth value is specified, it is the getNumericValue(). 2354 // Otherwise it is expected to be the same as the getUnicodeNumericValue(), 2355 // where UCharacter.NO_NUMERIC_VALUE is turned into -1. 2356 // getNumericValue() returns -2 if the code point has a value 2357 // which is not a non-negative integer. (This is mostly auto-converted to -2.) 2358 { 0x0F33, UCharacter.NumericType.NUMERIC, -1./2. }, 2359 { 0x0C66, UCharacter.NumericType.DECIMAL, 0 }, 2360 { 0x96f6, UCharacter.NumericType.NUMERIC, 0 }, 2361 { 0xa833, UCharacter.NumericType.NUMERIC, 1./16. }, 2362 { 0x2152, UCharacter.NumericType.NUMERIC, 1./10. }, 2363 { 0x2151, UCharacter.NumericType.NUMERIC, 1./9. }, 2364 { 0x1245f, UCharacter.NumericType.NUMERIC, 1./8. }, 2365 { 0x2150, UCharacter.NumericType.NUMERIC, 1./7. }, 2366 { 0x2159, UCharacter.NumericType.NUMERIC, 1./6. }, 2367 { 0x09f6, UCharacter.NumericType.NUMERIC, 3./16. }, 2368 { 0x2155, UCharacter.NumericType.NUMERIC, 1./5. }, 2369 { 0x00BD, UCharacter.NumericType.NUMERIC, 1./2. }, 2370 { 0x0031, UCharacter.NumericType.DECIMAL, 1. }, 2371 { 0x4e00, UCharacter.NumericType.NUMERIC, 1. }, 2372 { 0x58f1, UCharacter.NumericType.NUMERIC, 1. }, 2373 { 0x10320, UCharacter.NumericType.NUMERIC, 1. }, 2374 { 0x0F2B, UCharacter.NumericType.NUMERIC, 3./2. }, 2375 { 0x00B2, UCharacter.NumericType.DIGIT, 2. }, /* Unicode 4.0 change */ 2376 { 0x5f10, UCharacter.NumericType.NUMERIC, 2. }, 2377 { 0x1813, UCharacter.NumericType.DECIMAL, 3. }, 2378 { 0x5f0e, UCharacter.NumericType.NUMERIC, 3. }, 2379 { 0x2173, UCharacter.NumericType.NUMERIC, 4. }, 2380 { 0x8086, UCharacter.NumericType.NUMERIC, 4. }, 2381 { 0x278E, UCharacter.NumericType.DIGIT, 5. }, 2382 { 0x1D7F2, UCharacter.NumericType.DECIMAL, 6. }, 2383 { 0x247A, UCharacter.NumericType.DIGIT, 7. }, 2384 { 0x7396, UCharacter.NumericType.NUMERIC, 9. }, 2385 { 0x1372, UCharacter.NumericType.NUMERIC, 10. }, 2386 { 0x216B, UCharacter.NumericType.NUMERIC, 12. }, 2387 { 0x16EE, UCharacter.NumericType.NUMERIC, 17. }, 2388 { 0x249A, UCharacter.NumericType.NUMERIC, 19. }, 2389 { 0x303A, UCharacter.NumericType.NUMERIC, 30. }, 2390 { 0x5345, UCharacter.NumericType.NUMERIC, 30. }, 2391 { 0x32B2, UCharacter.NumericType.NUMERIC, 37. }, 2392 { 0x1375, UCharacter.NumericType.NUMERIC, 40. }, 2393 { 0x10323, UCharacter.NumericType.NUMERIC, 50. }, 2394 { 0x0BF1, UCharacter.NumericType.NUMERIC, 100. }, 2395 { 0x964c, UCharacter.NumericType.NUMERIC, 100. }, 2396 { 0x217E, UCharacter.NumericType.NUMERIC, 500. }, 2397 { 0x2180, UCharacter.NumericType.NUMERIC, 1000. }, 2398 { 0x4edf, UCharacter.NumericType.NUMERIC, 1000. }, 2399 { 0x2181, UCharacter.NumericType.NUMERIC, 5000. }, 2400 { 0x137C, UCharacter.NumericType.NUMERIC, 10000. }, 2401 { 0x4e07, UCharacter.NumericType.NUMERIC, 10000. }, 2402 { 0x12432, UCharacter.NumericType.NUMERIC, 216000. }, 2403 { 0x12433, UCharacter.NumericType.NUMERIC, 432000. }, 2404 { 0x4ebf, UCharacter.NumericType.NUMERIC, 100000000. }, 2405 { 0x5146, UCharacter.NumericType.NUMERIC, 1000000000000. }, 2406 { -1, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2407 { 0x61, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE, 10. }, 2408 { 0x3000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2409 { 0xfffe, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2410 { 0x10301, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2411 { 0xe0033, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2412 { 0x10ffff, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2413 { 0x110000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE } 2414 }; 2415 2416 for (int i = 0; i < values.length; ++ i) { 2417 int c = (int)values[i][0]; 2418 int type = UCharacter.getIntPropertyValue(c, 2419 UProperty.NUMERIC_TYPE); 2420 double nv = UCharacter.getUnicodeNumericValue(c); 2421 2422 if (type != values[i][1]) { 2423 errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4) 2424 + ") = " + type + " should be " + (int)values[i][1]); 2425 } 2426 if (0.000001 <= Math.abs(nv - values[i][2])) { 2427 errln("UCharacter.getUnicodeNumericValue(\\u" + Utility.hex(c, 4) 2428 + ") = " + nv + " should be " + values[i][2]); 2429 } 2430 2431 // Test getNumericValue() as well. 2432 // It can only return the subset of numeric values that are 2433 // non-negative and fit into an int. 2434 int expectedInt; 2435 if (values[i].length == 3) { 2436 if (values[i][2] == UCharacter.NO_NUMERIC_VALUE) { 2437 expectedInt = -1; 2438 } else { 2439 expectedInt = (int)values[i][2]; 2440 if (expectedInt < 0 || expectedInt != values[i][2]) { 2441 // The numeric value is not a non-negative integer. 2442 expectedInt = -2; 2443 } 2444 } 2445 } else { 2446 expectedInt = (int)values[i][3]; 2447 } 2448 int nvInt = UCharacter.getNumericValue(c); 2449 if (nvInt != expectedInt) { 2450 errln("UCharacter.getNumericValue(\\u" + Utility.hex(c, 4) 2451 + ") = " + nvInt + " should be " + expectedInt); 2452 } 2453 } 2454 } 2455 2456 /** 2457 * Test the property values API. See JB#2410. 2458 */ 2459 @Test TestPropertyValues()2460 public void TestPropertyValues() { 2461 int i, p, min, max; 2462 2463 /* Min should be 0 for everything. */ 2464 /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */ 2465 for (p=UProperty.INT_START; p<UProperty.INT_LIMIT; ++p) { 2466 min = UCharacter.getIntPropertyMinValue(p); 2467 if (min != 0) { 2468 if (p == UProperty.BLOCK) { 2469 /* This is okay...for now. See JB#2487. 2470 TODO Update this for JB#2487. */ 2471 } else { 2472 String name; 2473 name = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG); 2474 errln("FAIL: UCharacter.getIntPropertyMinValue(" + name + ") = " + 2475 min + ", exp. 0"); 2476 } 2477 } 2478 } 2479 2480 if (UCharacter.getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK) 2481 != 0 2482 || UCharacter.getIntPropertyMaxValue( 2483 UProperty.GENERAL_CATEGORY_MASK) 2484 != -1) { 2485 errln("error: UCharacter.getIntPropertyMin/MaxValue(" 2486 + "UProperty.GENERAL_CATEGORY_MASK) is wrong"); 2487 } 2488 2489 /* Max should be -1 for invalid properties. */ 2490 max = UCharacter.getIntPropertyMaxValue(-1); 2491 if (max != -1) { 2492 errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = " + 2493 max + ", exp. -1"); 2494 } 2495 2496 /* Script should return 0 for an invalid code point. If the API 2497 throws an exception then that's fine too. */ 2498 for (i=0; i<2; ++i) { 2499 try { 2500 int script = 0; 2501 String desc = null; 2502 switch (i) { 2503 case 0: 2504 script = UScript.getScript(-1); 2505 desc = "UScript.getScript(-1)"; 2506 break; 2507 case 1: 2508 script = UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT); 2509 desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)"; 2510 break; 2511 } 2512 if (script != 0) { 2513 errln("FAIL: " + desc + " = " + script + ", exp. 0"); 2514 } 2515 } catch (IllegalArgumentException e) {} 2516 } 2517 } 2518 2519 @Test TestBidiPairedBracketType()2520 public void TestBidiPairedBracketType() { 2521 // BidiBrackets-6.3.0.txt says: 2522 // 2523 // The set of code points listed in this file was originally derived 2524 // using the character properties General_Category (gc), Bidi_Class (bc), 2525 // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows: 2526 // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe, 2527 // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket 2528 // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type 2529 // property values are Open and Close, respectively. 2530 UnicodeSet bpt = new UnicodeSet("[:^bpt=n:]"); 2531 assertTrue("bpt!=None is not empty", !bpt.isEmpty()); 2532 // The following should always be true. 2533 UnicodeSet mirrored = new UnicodeSet("[:Bidi_M:]"); 2534 UnicodeSet other_neutral = new UnicodeSet("[:bc=ON:]"); 2535 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); 2536 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt)); 2537 // The following are true at least initially in Unicode 6.3. 2538 UnicodeSet bpt_open = new UnicodeSet("[:bpt=o:]"); 2539 UnicodeSet bpt_close = new UnicodeSet("[:bpt=c:]"); 2540 UnicodeSet ps = new UnicodeSet("[:Ps:]"); 2541 UnicodeSet pe = new UnicodeSet("[:Pe:]"); 2542 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); 2543 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); 2544 } 2545 2546 @Test TestEmojiProperties()2547 public void TestEmojiProperties() { 2548 assertFalse("space is not Emoji", UCharacter.hasBinaryProperty(0x20, UProperty.EMOJI)); 2549 assertTrue("shooting star is Emoji", UCharacter.hasBinaryProperty(0x1F320, UProperty.EMOJI)); 2550 UnicodeSet emoji = new UnicodeSet("[:Emoji:]"); 2551 assertTrue("lots of Emoji", emoji.size() > 700); 2552 2553 assertTrue("shooting star is Emoji_Presentation", 2554 UCharacter.hasBinaryProperty(0x1F320, UProperty.EMOJI_PRESENTATION)); 2555 assertTrue("Fitzpatrick 6 is Emoji_Modifier", 2556 UCharacter.hasBinaryProperty(0x1F3FF, UProperty.EMOJI_MODIFIER)); 2557 assertTrue("happy person is Emoji_Modifier_Base", 2558 UCharacter.hasBinaryProperty(0x1F64B, UProperty.EMOJI_MODIFIER_BASE)); 2559 assertTrue("asterisk is Emoji_Component", 2560 UCharacter.hasBinaryProperty(0x2A, UProperty.EMOJI_COMPONENT)); 2561 assertTrue("copyright is Extended_Pictographic", 2562 UCharacter.hasBinaryProperty(0xA9, UProperty.EXTENDED_PICTOGRAPHIC)); 2563 } 2564 2565 @Test TestIndicPositionalCategory()2566 public void TestIndicPositionalCategory() { 2567 UnicodeSet na = new UnicodeSet("[:InPC=NA:]"); 2568 assertTrue("mostly NA", 1000000 <= na.size() && na.size() <= Character.MAX_CODE_POINT - 500); 2569 UnicodeSet vol = new UnicodeSet("[:InPC=Visual_Order_Left:]"); 2570 assertTrue("some Visual_Order_Left", 19 <= vol.size() && vol.size() <= 100); 2571 assertEquals("U+08FF: NA", UCharacter.IndicPositionalCategory.NA, 2572 UCharacter.getIntPropertyValue(0x08FF, UProperty.INDIC_POSITIONAL_CATEGORY)); 2573 assertEquals("U+0900: Top", UCharacter.IndicPositionalCategory.TOP, 2574 UCharacter.getIntPropertyValue(0x0900, UProperty.INDIC_POSITIONAL_CATEGORY)); 2575 assertEquals("U+10A06: Overstruck", UCharacter.IndicPositionalCategory.OVERSTRUCK, 2576 UCharacter.getIntPropertyValue(0x10A06, UProperty.INDIC_POSITIONAL_CATEGORY)); 2577 } 2578 2579 @Test TestIndicSyllabicCategory()2580 public void TestIndicSyllabicCategory() { 2581 UnicodeSet other = new UnicodeSet("[:InSC=Other:]"); 2582 assertTrue("mostly Other", 1000000 <= other.size() && other.size() <= Character.MAX_CODE_POINT - 500); 2583 UnicodeSet ava = new UnicodeSet("[:InSC=Avagraha:]"); 2584 assertTrue("some Avagraha", 16 <= ava.size() && ava.size() <= 100); 2585 assertEquals("U+08FF: Other", UCharacter.IndicSyllabicCategory.OTHER, 2586 UCharacter.getIntPropertyValue(0x08FF, UProperty.INDIC_SYLLABIC_CATEGORY)); 2587 assertEquals("U+0900: Bindu", UCharacter.IndicSyllabicCategory.BINDU, 2588 UCharacter.getIntPropertyValue(0x0900, UProperty.INDIC_SYLLABIC_CATEGORY)); 2589 assertEquals("U+11065: Brahmi_Joining_Number", UCharacter.IndicSyllabicCategory.BRAHMI_JOINING_NUMBER, 2590 UCharacter.getIntPropertyValue(0x11065, UProperty.INDIC_SYLLABIC_CATEGORY)); 2591 } 2592 2593 @Test TestVerticalOrientation()2594 public void TestVerticalOrientation() { 2595 UnicodeSet r = new UnicodeSet("[:vo=R:]"); 2596 assertTrue("mostly R", 0xc0000 <= r.size() && r.size() <= 0xd0000); 2597 UnicodeSet u = new UnicodeSet("[:vo=U:]"); 2598 assertTrue("much U", 0x40000 <= u.size() && u.size() <= 0x50000); 2599 UnicodeSet tu = new UnicodeSet("[:vo=Tu:]"); 2600 assertTrue("some Tu", 147 <= tu.size() && tu.size() <= 300); 2601 assertEquals("U+0E01: Rotated", UCharacter.VerticalOrientation.ROTATED, 2602 UCharacter.getIntPropertyValue(0x0E01, UProperty.VERTICAL_ORIENTATION)); 2603 assertEquals("U+3008: Transformed_Rotated", UCharacter.VerticalOrientation.TRANSFORMED_ROTATED, 2604 UCharacter.getIntPropertyValue(0x3008, UProperty.VERTICAL_ORIENTATION)); 2605 assertEquals("U+33333: Upright", UCharacter.VerticalOrientation.UPRIGHT, 2606 UCharacter.getIntPropertyValue(0x33333, UProperty.VERTICAL_ORIENTATION)); 2607 } 2608 2609 @Test TestIsBMP()2610 public void TestIsBMP() 2611 { 2612 int ch[] = {0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff}; 2613 boolean flag[] = {true, false, true, false, true, false}; 2614 for (int i = 0; i < ch.length; i ++) { 2615 if (UCharacter.isBMP(ch[i]) != flag[i]) { 2616 errln("Fail: \\u" + Utility.hex(ch[i], 8) 2617 + " failed at UCharacter.isBMP"); 2618 } 2619 } 2620 } 2621 showADiffB(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean expect, boolean diffIsError)2622 private boolean showADiffB(UnicodeSet a, UnicodeSet b, 2623 String a_name, String b_name, 2624 boolean expect, 2625 boolean diffIsError){ 2626 int i, start, end; 2627 boolean equal=true; 2628 for(i=0; i < a.getRangeCount(); ++i) { 2629 start = a.getRangeStart(i); 2630 end = a.getRangeEnd(i); 2631 if(expect!=b.contains(start, end)) { 2632 equal=false; 2633 while(start<=end) { 2634 if(expect!=b.contains(start)) { 2635 if(diffIsError) { 2636 if(expect) { 2637 errln("error: "+ a_name +" contains "+ hex(start)+" but "+ b_name +" does not"); 2638 } else { 2639 errln("error: "+a_name +" and "+ b_name+" both contain "+hex(start) +" but should not intersect"); 2640 } 2641 } else { 2642 if(expect) { 2643 logln("info: "+a_name +" contains "+hex(start)+ "but " + b_name +" does not"); 2644 } else { 2645 logln("info: "+a_name +" and "+b_name+" both contain "+hex(start)+" but should not intersect"); 2646 } 2647 } 2648 } 2649 ++start; 2650 } 2651 } 2652 } 2653 return equal; 2654 } showAMinusB(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean diffIsError)2655 private boolean showAMinusB(UnicodeSet a, UnicodeSet b, 2656 String a_name, String b_name, 2657 boolean diffIsError) { 2658 2659 return showADiffB(a, b, a_name, b_name, true, diffIsError); 2660 } 2661 showAIntersectB(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean diffIsError)2662 private boolean showAIntersectB(UnicodeSet a, UnicodeSet b, 2663 String a_name, String b_name, 2664 boolean diffIsError) { 2665 return showADiffB(a, b, a_name, b_name, false, diffIsError); 2666 } 2667 compareUSets(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean diffIsError)2668 private boolean compareUSets(UnicodeSet a, UnicodeSet b, 2669 String a_name, String b_name, 2670 boolean diffIsError) { 2671 return 2672 showAMinusB(a, b, a_name, b_name, diffIsError) && 2673 showAMinusB(b, a, b_name, a_name, diffIsError); 2674 } 2675 2676 /* various tests for consistency of UCD data and API behavior */ 2677 @Test TestConsistency()2678 public void TestConsistency() { 2679 UnicodeSet set1, set2, set3, set4; 2680 2681 int start, end; 2682 int i, length; 2683 2684 String hyphenPattern = "[:Hyphen:]"; 2685 String dashPattern = "[:Dash:]"; 2686 String lowerPattern = "[:Lowercase:]"; 2687 String formatPattern = "[:Cf:]"; 2688 String alphaPattern = "[:Alphabetic:]"; 2689 2690 /* 2691 * It used to be that UCD.html and its precursors said 2692 * "Those dashes used to mark connections between pieces of words, 2693 * plus the Katakana middle dot." 2694 * 2695 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash 2696 * but not from Hyphen. 2697 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html. 2698 * Therefore, do not show errors when testing the Hyphen property. 2699 */ 2700 logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n" 2701 + "known to the UTC and not considered errors.\n"); 2702 2703 set1=new UnicodeSet(hyphenPattern); 2704 set2=new UnicodeSet(dashPattern); 2705 2706 /* remove the Katakana middle dot(s) from set1 */ 2707 set1.remove(0x30fb); 2708 set2.remove (0xff65); /* halfwidth variant */ 2709 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false); 2710 2711 2712 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */ 2713 set3=new UnicodeSet(formatPattern); 2714 set4=new UnicodeSet(alphaPattern); 2715 2716 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false); 2717 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true); 2718 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true); 2719 /* 2720 * Check that each lowercase character has "small" in its name 2721 * and not "capital". 2722 * There are some such characters, some of which seem odd. 2723 * Use the verbose flag to see these notices. 2724 */ 2725 set1=new UnicodeSet(lowerPattern); 2726 2727 for(i=0;; ++i) { 2728 // try{ 2729 // length=set1.getItem(set1, i, &start, &end, NULL, 0, &errorCode); 2730 // }catch(Exception e){ 2731 // break; 2732 // } 2733 start = set1.getRangeStart(i); 2734 end = set1.getRangeEnd(i); 2735 length = i<set1.getRangeCount() ? set1.getRangeCount() : 0; 2736 if(length!=0) { 2737 break; /* done with code points, got a string or -1 */ 2738 } 2739 2740 while(start<=end) { 2741 String name=UCharacter.getName(start); 2742 2743 if( (name.indexOf("SMALL")< 0 || name.indexOf("CAPITAL")<-1) && 2744 name.indexOf("SMALL CAPITAL")==-1 2745 ) { 2746 logln("info: [:Lowercase:] contains U+"+hex(start) + " whose name does not suggest lowercase: " + name); 2747 } 2748 ++start; 2749 } 2750 } 2751 2752 2753 /* 2754 * Test for an example that unorm_getCanonStartSet() delivers 2755 * all characters that compose from the input one, 2756 * even in multiple steps. 2757 * For example, the set for "I" (0049) should contain both 2758 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E). 2759 * In general, the set for the middle such character should be a subset 2760 * of the set for the first. 2761 */ 2762 Normalizer2 norm2=Normalizer2.getNFDInstance(); 2763 set1=new UnicodeSet(); 2764 Norm2AllModes.getNFCInstance().impl. 2765 ensureCanonIterData().getCanonStartSet(0x49, set1); 2766 set2=new UnicodeSet(); 2767 2768 /* enumerate all characters that are plausible to be latin letters */ 2769 for(start=0xa0; start<0x2000; ++start) { 2770 String decomp=norm2.normalize(UTF16.valueOf(start)); 2771 if(decomp.length() > 1 && decomp.charAt(0)==0x49) { 2772 set2.add(start); 2773 } 2774 } 2775 2776 compareUSets(set1, set2, 2777 "[canon start set of 0049]", "[all c with canon decomp with 0049]", 2778 false); 2779 2780 } 2781 2782 @Test 2783 public void TestCoverage() { 2784 //cover forDigit 2785 char ch1 = UCharacter.forDigit(7, 11); 2786 assertEquals("UCharacter.forDigit ", "7", String.valueOf(ch1)); 2787 char ch2 = UCharacter.forDigit(17, 20); 2788 assertEquals("UCharacter.forDigit ", "h", String.valueOf(ch2)); 2789 2790 //Jitterbug 4451, for coverage 2791 for (int i = 0x0041; i < 0x005B; i++) { 2792 if (!UCharacter.isJavaLetter(i)) 2793 errln("FAIL \\u" + hex(i) + " expected to be a letter"); 2794 if (!UCharacter.isJavaIdentifierStart(i)) 2795 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier start character"); 2796 if (!UCharacter.isJavaLetterOrDigit(i)) 2797 errln("FAIL \\u" + hex(i) + " expected not to be a Java letter"); 2798 if (!UCharacter.isJavaIdentifierPart(i)) 2799 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier part character"); 2800 } 2801 char[] spaces = {'\t','\n','\f','\r',' '}; 2802 for (int i = 0; i < spaces.length; i++){ 2803 if (!UCharacter.isSpace(spaces[i])) 2804 errln("FAIL \\u" + hex(spaces[i]) + " expected to be a Java space"); 2805 } 2806 } 2807 2808 @Test 2809 public void TestBlockData() 2810 { 2811 Class ubc = UCharacter.UnicodeBlock.class; 2812 2813 for (int b = 1; b < UCharacter.UnicodeBlock.COUNT; b += 1) { 2814 UCharacter.UnicodeBlock blk = UCharacter.UnicodeBlock.getInstance(b); 2815 int id = blk.getID(); 2816 String name = blk.toString(); 2817 2818 if (id != b) { 2819 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with id = " + id); 2820 } 2821 2822 try { 2823 if (ubc.getField(name + "_ID").getInt(blk) != b) { 2824 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with a name of " + name + 2825 " which does not match the block id."); 2826 } 2827 } catch (Exception e) { 2828 errln("Couldn't get the id name for id " + b); 2829 } 2830 } 2831 } 2832 2833 /* 2834 * The following method tests 2835 * public static UnicodeBlock getInstance(int id) 2836 */ 2837 @Test 2838 public void TestGetInstance(){ 2839 // Testing values for invalid and valid ID 2840 int[] invalid_test = {-1,-10,-100}; 2841 for(int i=0; i< invalid_test.length; i++){ 2842 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.getInstance(invalid_test[i])){ 2843 errln("UCharacter.UnicodeBlock.getInstance(invalid_test[i]) was " + 2844 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " + 2845 UCharacter.UnicodeBlock.getInstance(invalid_test[i]) + ". Expected " + 2846 UCharacter.UnicodeBlock.INVALID_CODE); 2847 } 2848 } 2849 } 2850 2851 /* 2852 * The following method tests 2853 * public static UnicodeBlock of(int ch) 2854 */ 2855 @Test 2856 public void TestOf(){ 2857 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1)){ 2858 errln("UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) was " + 2859 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " + 2860 UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) + ". Expected " + 2861 UCharacter.UnicodeBlock.INVALID_CODE); 2862 } 2863 } 2864 2865 /* 2866 * The following method tests 2867 * public static final UnicodeBlock forName(String blockName) 2868 */ 2869 @Test 2870 public void TestForName(){ 2871 //UCharacter.UnicodeBlock.forName(""); 2872 //Tests when "if (b == null)" is true 2873 } 2874 2875 /* 2876 * The following method tests 2877 * public static int getNumericValue(int ch) 2878 */ 2879 @Test 2880 public void TestGetNumericValue(){ 2881 // The following tests the else statement when 2882 // if(numericType<NumericType.COUNT) is false 2883 // The following values were obtained by testing all values from 2884 // UTF16.CODEPOINT_MIN_VALUE to UTF16.CODEPOINT_MAX_VALUE inclusively 2885 // to obtain the value to go through the else statement. 2886 int[] valid_values = 2887 {3058,3442,4988,8558,8559,8574,8575,8576,8577,8578,8583,8584,19975, 2888 20159,20191,20740,20806,21315,33836,38433,65819,65820,65821,65822, 2889 65823,65824,65825,65826,65827,65828,65829,65830,65831,65832,65833, 2890 65834,65835,65836,65837,65838,65839,65840,65841,65842,65843,65861, 2891 65862,65863,65868,65869,65870,65875,65876,65877,65878,65899,65900, 2892 65901,65902,65903,65904,65905,65906,66378,68167}; 2893 2894 int[] results = 2895 {1000,1000,10000,500,1000,500,1000,1000,5000,10000,50000,100000, 2896 10000,100000000,1000,100000000,-2,1000,10000,1000,300,400,500, 2897 600,700,800,900,1000,2000,3000,4000,5000,6000,7000,8000,9000, 2898 10000,20000,30000,40000,50000,60000,70000,80000,90000,500,5000, 2899 50000,500,1000,5000,500,1000,10000,50000,300,500,500,500,500,500, 2900 1000,5000,900,1000}; 2901 2902 if(valid_values.length != results.length){ 2903 errln("The valid_values array and the results array need to be "+ 2904 "the same length."); 2905 } else { 2906 for(int i = 0; i < valid_values.length; i++){ 2907 try{ 2908 if(UCharacter.getNumericValue(valid_values[i]) != results[i]){ 2909 errln("UCharacter.getNumericValue(i) returned a " + 2910 "different value from the expected result. " + 2911 "Got " + UCharacter.getNumericValue(valid_values[i]) + 2912 "Expected" + results[i]); 2913 } 2914 } catch(Exception e){ 2915 errln("UCharacter.getNumericValue(int) returned an exception " + 2916 "with the parameter value"); 2917 } 2918 } 2919 } 2920 } 2921 2922 /* 2923 * The following method tests 2924 * public static double getUnicodeNumericValue(int ch) 2925 */ 2926 // The following tests covers if(mant==0), else if(mant > 9), and default 2927 @Test 2928 public void TestGetUnicodeNumericValue(){ 2929 /* The code coverage for if(mant==0), else if(mant > 9), and default 2930 * could not be covered even with input values from UTF16.CODEPOINT_MIN_VALUE 2931 * to UTF16.CODEPOINT_MAX_VALUE. I also tested from UTF16.CODEPOINT_MAX_VALUE to 2932 * Integer.MAX_VALUE and didn't recieve any code coverage there too. 2933 * Therefore, the code could either be dead code or meaningless. 2934 */ 2935 } 2936 2937 /* 2938 * The following method tests 2939 * public static String toString(int ch) 2940 */ 2941 @Test 2942 public void TestToString(){ 2943 int[] valid_tests = { 2944 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 2945 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 2946 int[] invalid_tests = { 2947 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2948 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2949 2950 for(int i=0; i< valid_tests.length; i++){ 2951 if(UCharacter.toString(valid_tests[i]) == null){ 2952 errln("UCharacter.toString(int) was not suppose to return " + 2953 "null because it was given a valid parameter. Value passed: " + 2954 valid_tests[i] + ". Got null."); 2955 } 2956 } 2957 2958 for(int i=0; i< invalid_tests.length; i++){ 2959 if(UCharacter.toString(invalid_tests[i]) != null){ 2960 errln("UCharacter.toString(int) was suppose to return " + 2961 "null because it was given an invalid parameter. Value passed: " + 2962 invalid_tests[i] + ". Got: " + UCharacter.toString(invalid_tests[i])); 2963 } 2964 } 2965 } 2966 2967 /* 2968 * The following method tests 2969 * public static int getCombiningClass(int ch) 2970 */ 2971 @Test 2972 public void TestGetCombiningClass(){ 2973 int[] valid_tests = { 2974 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 2975 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 2976 int[] invalid_tests = { 2977 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2978 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2979 2980 for(int i=0; i< valid_tests.length; i++){ 2981 try{ 2982 UCharacter.getCombiningClass(valid_tests[i]); 2983 } catch(Exception e){ 2984 errln("UCharacter.getCombiningClass(int) was not supposed to have " + 2985 "an exception. Value passed: " + valid_tests[i]); 2986 } 2987 } 2988 2989 for(int i=0; i< invalid_tests.length; i++){ 2990 try{ 2991 assertEquals("getCombiningClass(out of range)", 2992 0, UCharacter.getCombiningClass(invalid_tests[i])); 2993 } catch(Exception e){ 2994 errln("UCharacter.getCombiningClass(int) was not supposed to have " + 2995 "an exception. Value passed: " + invalid_tests[i]); 2996 } 2997 } 2998 } 2999 3000 /* 3001 * The following method tests 3002 * public static String getName(int ch) 3003 */ 3004 @Test 3005 public void TestGetName(){ 3006 // Need to test on other "one characters" for the getName() method 3007 String[] data = {"a","z"}; 3008 String[] results = {"LATIN SMALL LETTER A","LATIN SMALL LETTER Z"}; 3009 if(data.length != results.length){ 3010 errln("The data array and the results array need to be "+ 3011 "the same length."); 3012 } else { 3013 for(int i=0; i < data.length; i++){ 3014 if(UCharacter.getName(data[i], "").compareTo(results[i]) != 0){ 3015 errln("UCharacter.getName(String, String) was suppose " + 3016 "to have the same result for the data in the parameter. " + 3017 "Value passed: " + data[i] + ". Got: " + 3018 UCharacter.getName(data[i], "") + ". Expected: " + 3019 results[i]); 3020 } 3021 } 3022 } 3023 } 3024 3025 /* 3026 * The following method tests 3027 * public static String getISOComment(int ch) 3028 */ 3029 @Test 3030 public void TestGetISOComment(){ 3031 int[] invalid_tests = { 3032 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 3033 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 3034 3035 for(int i=0; i< invalid_tests.length; i++){ 3036 if(UCharacter.getISOComment(invalid_tests[i]) != null){ 3037 errln("UCharacter.getISOComment(int) was suppose to return " + 3038 "null because it was given an invalid parameter. Value passed: " + 3039 invalid_tests[i] + ". Got: " + UCharacter.getISOComment(invalid_tests[i])); 3040 } 3041 } 3042 } 3043 3044 /* 3045 * The following method tests 3046 * public void setLimit(int lim) 3047 */ 3048 @Test 3049 public void TestSetLimit(){ 3050 // TODO: Tests when "if(0<=lim && lim<=s.length())" is false 3051 } 3052 3053 /* 3054 * The following method tests 3055 * public int nextCaseMapCP() 3056 */ 3057 @Test 3058 public void TestNextCaseMapCP(){ 3059 // TODO: Tests when "if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false 3060 /* TODO: Tests when "if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit && 3061 * UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false 3062 */ 3063 } 3064 3065 /* 3066 * The following method tests 3067 * public void reset(int direction) 3068 */ 3069 @Test 3070 public void TestReset(){ 3071 // The method reset() is never called by another function 3072 // TODO: Tests when "else if(direction<0)" is false 3073 } 3074 3075 /* 3076 * The following methods test 3077 * public static String toTitleCase(Locale locale, String str, BreakIterator breakiter) 3078 */ 3079 @Test 3080 public void TestToTitleCaseCoverage(){ 3081 //Calls the function "toTitleCase(Locale locale, String str, BreakIterator breakiter)" 3082 String[] locale={"en","fr","zh","ko","ja","it","de",""}; 3083 for(int i=0; i<locale.length; i++){ 3084 UCharacter.toTitleCase(new Locale(locale[i]), "", null); 3085 } 3086 3087 // Calls the function "String toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)" 3088 // Tests when "if (locale == null)" is true 3089 UCharacter.toTitleCase((ULocale)null, "", null, 0); 3090 3091 // TODO: Tests when "if(index==BreakIterator.DONE || index>srcLength)" is true 3092 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0 && UCaseProps.NONE==gCsp.getType(c))" is false 3093 // TODO: Tests when "if(prev<titleStart)" is false 3094 // TODO: Tests when "if(c<=0xffff)" is false 3095 // TODO: Tests when "if(c<=0xffff)" is false 3096 // TODO: Tests when "if(titleLimit<index)" is false 3097 // TODO: Tests when "else if((nc=iter.nextCaseMapCP())>=0)" is false 3098 } 3099 3100 @Test 3101 public void testToTitleCase_Locale_String_BreakIterator_I() { 3102 String titleCase = UCharacter.toTitleCase(new Locale("nl"), "ijsland", null, 3103 UCharacter.FOLD_CASE_DEFAULT); 3104 assertEquals("Wrong title casing", "IJsland", titleCase); 3105 } 3106 3107 @Test 3108 public void testToTitleCase_String_BreakIterator_en() { 3109 String titleCase = UCharacter.toTitleCase(new Locale("en"), "ijsland", null); 3110 assertEquals("Wrong title casing", "Ijsland", titleCase); 3111 } 3112 /* 3113 * The following method tests 3114 * public static String toUpperCase(ULocale locale, String str) 3115 */ 3116 @Test 3117 public void TestToUpperCase(){ 3118 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false 3119 } 3120 3121 /* 3122 * The following method tests 3123 * public static String toLowerCase(ULocale locale, String str) 3124 */ 3125 @Test 3126 public void TestToLowerCase(){ 3127 // Test when locale is null 3128 String[] cases = {"","a","A","z","Z","Dummy","DUMMY","dummy","a z","A Z", 3129 "'","\"","0","9","0a","a0","*","~!@#$%^&*()_+"}; 3130 for(int i=0; i<cases.length; i++){ 3131 try{ 3132 UCharacter.toLowerCase((ULocale) null, cases[i]); 3133 } catch(Exception e){ 3134 errln("UCharacter.toLowerCase was not suppose to return an " + 3135 "exception for input of null and string: " + cases[i]); 3136 } 3137 } 3138 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false 3139 } 3140 3141 /* 3142 * The following method tests 3143 * public static int getHanNumericValue(int ch) 3144 */ 3145 @Test 3146 public void TestGetHanNumericValue(){ 3147 int[] valid = { 3148 0x3007, //IDEOGRAPHIC_NUMBER_ZERO_ 3149 0x96f6, //CJK_IDEOGRAPH_COMPLEX_ZERO_ 3150 0x4e00, //CJK_IDEOGRAPH_FIRST_ 3151 0x58f9, //CJK_IDEOGRAPH_COMPLEX_ONE_ 3152 0x4e8c, //CJK_IDEOGRAPH_SECOND_ 3153 0x8cb3, //CJK_IDEOGRAPH_COMPLEX_TWO_ 3154 0x4e09, //CJK_IDEOGRAPH_THIRD_ 3155 0x53c3, //CJK_IDEOGRAPH_COMPLEX_THREE_ 3156 0x56db, //CJK_IDEOGRAPH_FOURTH_ 3157 0x8086, //CJK_IDEOGRAPH_COMPLEX_FOUR_ 3158 0x4e94, //CJK_IDEOGRAPH_FIFTH_ 3159 0x4f0d, //CJK_IDEOGRAPH_COMPLEX_FIVE_ 3160 0x516d, //CJK_IDEOGRAPH_SIXTH_ 3161 0x9678, //CJK_IDEOGRAPH_COMPLEX_SIX_ 3162 0x4e03, //CJK_IDEOGRAPH_SEVENTH_ 3163 0x67d2, //CJK_IDEOGRAPH_COMPLEX_SEVEN_ 3164 0x516b, //CJK_IDEOGRAPH_EIGHTH_ 3165 0x634c, //CJK_IDEOGRAPH_COMPLEX_EIGHT_ 3166 0x4e5d, //CJK_IDEOGRAPH_NINETH_ 3167 0x7396, //CJK_IDEOGRAPH_COMPLEX_NINE_ 3168 0x5341, //CJK_IDEOGRAPH_TEN_ 3169 0x62fe, //CJK_IDEOGRAPH_COMPLEX_TEN_ 3170 0x767e, //CJK_IDEOGRAPH_HUNDRED_ 3171 0x4f70, //CJK_IDEOGRAPH_COMPLEX_HUNDRED_ 3172 0x5343, //CJK_IDEOGRAPH_THOUSAND_ 3173 0x4edf, //CJK_IDEOGRAPH_COMPLEX_THOUSAND_ 3174 0x824c, //CJK_IDEOGRAPH_TEN_THOUSAND_ 3175 0x5104, //CJK_IDEOGRAPH_HUNDRED_MILLION_ 3176 }; 3177 3178 int[] invalid = {-5,-2,-1,0}; 3179 3180 int[] results = {0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,100,100, 3181 1000,1000,10000,100000000}; 3182 3183 if(valid.length != results.length){ 3184 errln("The arrays valid and results are suppose to be the same length " + 3185 "to test getHanNumericValue(int ch)."); 3186 } else{ 3187 for(int i=0; i<valid.length; i++){ 3188 if(UCharacter.getHanNumericValue(valid[i]) != results[i]){ 3189 errln("UCharacter.getHanNumericValue does not return the " + 3190 "same result as expected. Passed value: " + valid[i] + 3191 ". Got: " + UCharacter.getHanNumericValue(valid[i]) + 3192 ". Expected: " + results[i]); 3193 } 3194 } 3195 } 3196 3197 for(int i=0; i<invalid.length; i++){ 3198 if(UCharacter.getHanNumericValue(invalid[i]) != -1){ 3199 errln("UCharacter.getHanNumericValue does not return the " + 3200 "same result as expected. Passed value: " + invalid[i] + 3201 ". Got: " + UCharacter.getHanNumericValue(invalid[i]) + 3202 ". Expected: -1"); 3203 } 3204 } 3205 } 3206 3207 /* 3208 * The following method tests 3209 * public static boolean hasBinaryProperty(int ch, int property) 3210 */ 3211 @Test 3212 public void TestHasBinaryProperty(){ 3213 // Testing when "if (ch < MIN_VALUE || ch > MAX_VALUE)" is true 3214 int[] invalid = { 3215 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 3216 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 3217 int[] valid = { 3218 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 3219 UCharacter.MAX_VALUE, UCharacter.MAX_VALUE-1}; 3220 3221 for(int i=0; i<invalid.length; i++){ 3222 try{ 3223 if (UCharacter.hasBinaryProperty(invalid[i], 1)) { 3224 errln("UCharacter.hasBinaryProperty(ch, property) should return " + 3225 "false for out-of-range code points but " + 3226 "returns true for " + invalid[i]); 3227 } 3228 } catch(Exception e) { 3229 errln("UCharacter.hasBinaryProperty(ch, property) should not " + 3230 "throw an exception for any input. Value passed: " + 3231 invalid[i]); 3232 } 3233 } 3234 3235 for(int i=0; i<valid.length; i++){ 3236 try{ 3237 UCharacter.hasBinaryProperty(valid[i], 1); 3238 } catch(Exception e) { 3239 errln("UCharacter.hasBinaryProperty(ch, property) should not " + 3240 "throw an exception for any input. Value passed: " + 3241 valid[i]); 3242 } 3243 } 3244 } 3245 3246 /* 3247 * The following method tests 3248 * public static int getIntPropertyValue(int ch, int type) 3249 */ 3250 @Test 3251 public void TestGetIntPropertyValue(){ 3252 /* Testing UCharacter.getIntPropertyValue(ch, type) */ 3253 // Testing when "if (type < UProperty.BINARY_START)" is true 3254 int[] negative_cases = {-100,-50,-10,-5,-2,-1}; 3255 for(int i=0; i<negative_cases.length; i++){ 3256 if(UCharacter.getIntPropertyValue(0, negative_cases[i]) != 0){ 3257 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3258 "when passing a negative value of " + negative_cases[i]); 3259 3260 } 3261 } 3262 3263 // Testing when "if(ch<NormalizerImpl.JAMO_L_BASE)" is true 3264 for(int i=Normalizer2Impl.Hangul.JAMO_L_BASE-5; i<Normalizer2Impl.Hangul.JAMO_L_BASE; i++){ 3265 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){ 3266 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3267 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE"); 3268 3269 } 3270 } 3271 3272 // Testing when "else if((ch-=NormalizerImpl.HANGUL_BASE)<0)" is true 3273 for(int i=Normalizer2Impl.Hangul.HANGUL_BASE-5; i<Normalizer2Impl.Hangul.HANGUL_BASE; i++){ 3274 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){ 3275 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3276 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE"); 3277 3278 } 3279 } 3280 } 3281 3282 /* 3283 * The following method tests 3284 * public static int getIntPropertyMaxValue(int type) 3285 */ 3286 @Test 3287 public void TestGetIntPropertyMaxValue(){ 3288 /* Testing UCharacter.getIntPropertyMaxValue(type) */ 3289 // Testing when "else if (type < UProperty.INT_START)" is true 3290 int[] cases = {UProperty.BINARY_LIMIT, UProperty.BINARY_LIMIT+1, 3291 UProperty.INT_START-2, UProperty.INT_START-1}; 3292 for(int i=0; i<cases.length; i++){ 3293 if(UCharacter.getIntPropertyMaxValue(cases[i]) != -1){ 3294 errln("UCharacter.getIntPropertyMaxValue was suppose to return -1 " + 3295 "but got " + UCharacter.getIntPropertyMaxValue(cases[i])); 3296 } 3297 } 3298 3299 // TODO: Testing when the case statment reaches "default" 3300 // After testing between values of UProperty.INT_START and 3301 // UProperty.INT_LIMIT are covered, none of the values reaches default. 3302 } 3303 3304 /* 3305 * The following method tests 3306 * public static final int codePointAt(CharSequence seq, int index) 3307 * public static final int codePointAt(char[] text, int index, int limit) 3308 */ 3309 @Test 3310 public void TestCodePointAt(){ 3311 3312 // {LEAD_SURROGATE_MIN_VALUE, 3313 // LEAD_SURROGATE_MAX_VALUE, LEAD_SURROGATE_MAX_VALUE-1 3314 String[] cases = {"\uD800","\uDBFF","\uDBFE"}; 3315 int[] result = {55296,56319,56318}; 3316 for(int i=0; i < cases.length; i++){ 3317 /* Testing UCharacter.codePointAt(seq, index) */ 3318 // Testing when "if (index < seq.length())" is false 3319 if(UCharacter.codePointAt(cases[i], 0) != result[i]) 3320 errln("UCharacter.codePointAt(CharSequence ...) did not return as expected. " + 3321 "Passed value: " + cases[i] + ". Expected: " + 3322 result[i] + ". Got: " + 3323 UCharacter.codePointAt(cases[i], 0)); 3324 3325 /* Testing UCharacter.codePointAt(text, index) */ 3326 // Testing when "if (index < text.length)" is false 3327 if(UCharacter.codePointAt(cases[i].toCharArray(), 0) != result[i]) 3328 errln("UCharacter.codePointAt(char[] ...) did not return as expected. " + 3329 "Passed value: " + cases[i] + ". Expected: " + 3330 result[i] + ". Got: " + 3331 UCharacter.codePointAt(cases[i].toCharArray(), 0)); 3332 3333 /* Testing UCharacter.codePointAt(text, index, limit) */ 3334 // Testing when "if (index < limit)" is false 3335 if(UCharacter.codePointAt(cases[i].toCharArray(), 0, 1) != result[i]) 3336 errln("UCharacter.codePointAt(char[], int, int) did not return as expected. " + 3337 "Passed value: " + cases[i] + ". Expected: " + 3338 result[i] + ". Got: " + 3339 UCharacter.codePointAt(cases[i].toCharArray(), 0, 1)); 3340 } 3341 3342 /* Testing UCharacter.codePointAt(text, index, limit) */ 3343 // Testing when "if (index >= limit || limit > text.length)" is true 3344 char[] empty_text = {}; 3345 char[] one_char_text = {'a'}; 3346 char[] reg_text = {'d','u','m','m','y'}; 3347 int[] limitCases = {2,3,5,10,25}; 3348 3349 // When index >= limit 3350 for(int i=0; i < limitCases.length; i++){ 3351 try{ 3352 UCharacter.codePointAt(reg_text, 100, limitCases[i]); 3353 errln("UCharacter.codePointAt was suppose to return an exception " + 3354 "but got " + UCharacter.codePointAt(reg_text, 100, limitCases[i]) + 3355 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3356 100 + ", Limit: " + limitCases[i] + "."); 3357 } catch(Exception e){ 3358 } 3359 } 3360 3361 // When limit > text.length 3362 for(int i=0; i < limitCases.length; i++){ 3363 try{ 3364 UCharacter.codePointAt(empty_text, 0, limitCases[i]); 3365 errln("UCharacter.codePointAt was suppose to return an exception " + 3366 "but got " + UCharacter.codePointAt(empty_text, 0, limitCases[i]) + 3367 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " + 3368 0 + ", Limit: " + limitCases[i] + "."); 3369 } catch(Exception e){ 3370 } 3371 3372 try{ 3373 UCharacter.codePointCount(one_char_text, 0, limitCases[i]); 3374 errln("UCharacter.codePointCount was suppose to return an exception " + 3375 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) + 3376 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " + 3377 0 + ", Limit: " + limitCases[i] + "."); 3378 } catch(Exception e){ 3379 } 3380 } 3381 } 3382 3383 /* 3384 * The following method tests 3385 * public static final int codePointBefore(CharSequence seq, int index) 3386 * public static final int codePointBefore(char[] text, int index) 3387 * public static final int codePointBefore(char[] text, int index, int limit) 3388 */ 3389 @Test 3390 public void TestCodePointBefore(){ 3391 // {TRAIL_SURROGATE_MIN_VALUE, 3392 // TRAIL_SURROGATE_MAX_VALUE, TRAIL_SURROGATE_MAX_VALUE -1 3393 String[] cases = {"\uDC00","\uDFFF","\uDDFE"}; 3394 int[] result = {56320,57343,56830}; 3395 for(int i=0; i < cases.length; i++){ 3396 /* Testing UCharacter.codePointBefore(seq, index) */ 3397 // Testing when "if (index > 0)" is false 3398 if(UCharacter.codePointBefore(cases[i], 1) != result[i]) 3399 errln("UCharacter.codePointBefore(CharSequence ...) did not return as expected. " + 3400 "Passed value: " + cases[i] + ". Expected: " + 3401 result[i] + ". Got: " + 3402 UCharacter.codePointBefore(cases[i], 1)); 3403 3404 /* Testing UCharacter.codePointBefore(text, index) */ 3405 // Testing when "if (index > 0)" is false 3406 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1) != result[i]) 3407 errln("UCharacter.codePointBefore(char[] ...) did not return as expected. " + 3408 "Passed value: " + cases[i] + ". Expected: " + 3409 result[i] + ". Got: " + 3410 UCharacter.codePointBefore(cases[i].toCharArray(), 1)); 3411 3412 /* Testing UCharacter.codePointBefore(text, index, limit) */ 3413 // Testing when "if (index > limit)" is false 3414 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0) != result[i]) 3415 errln("UCharacter.codePointBefore(char[], int, int) did not return as expected. " + 3416 "Passed value: " + cases[i] + ". Expected: " + 3417 result[i] + ". Got: " + 3418 UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0)); 3419 } 3420 3421 /* Testing UCharacter.codePointBefore(text, index, limit) */ 3422 char[] dummy = {'d','u','m','m','y'}; 3423 // Testing when "if (index <= limit || limit < 0)" is true 3424 int[] negative_cases = {-100,-10,-5,-2,-1}; 3425 int[] index_cases = {0,1,2,5,10,100}; 3426 3427 for(int i=0; i < negative_cases.length; i++){ 3428 try{ 3429 UCharacter.codePointBefore(dummy, 10000, negative_cases[i]); 3430 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " + 3431 "when the parameter limit of " + negative_cases[i] + " is a negative number."); 3432 } catch(Exception e) {} 3433 } 3434 3435 for(int i=0; i < index_cases.length; i++){ 3436 try{ 3437 UCharacter.codePointBefore(dummy, index_cases[i], 101); 3438 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " + 3439 "when the parameter index of " + index_cases[i] + " is a negative number."); 3440 } catch(Exception e) {} 3441 } 3442 } 3443 3444 /* 3445 * The following method tests 3446 * public static final int toChars(int cp, char[] dst, int dstIndex) 3447 * public static final char[] toChars(int cp) 3448 */ 3449 @Test 3450 public void TestToChars(){ 3451 int[] positive_cases = {1,2,5,10,100}; 3452 char[] dst = {'a'}; 3453 3454 /* Testing UCharacter.toChars(cp, dst, dstIndex) */ 3455 for(int i=0; i < positive_cases.length; i++){ 3456 // Testing negative values when cp < 0 for if (cp >= 0) 3457 try{ 3458 UCharacter.toChars(-1*positive_cases[i],dst,0); 3459 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " + 3460 "when the parameter " + (-1*positive_cases[i]) + " is a negative number."); 3461 } catch(Exception e){ 3462 } 3463 3464 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true 3465 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0) != 1){ 3466 errln("UCharacter.toChars(int,char[],int) was suppose to return a value of 1. Got: " + 3467 UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0)); 3468 } 3469 3470 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and 3471 // when "if (cp <= MAX_CODE_POINT)" is false 3472 try{ 3473 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i],dst,0); 3474 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " + 3475 "when the parameter " + (UCharacter.MAX_CODE_POINT+positive_cases[i]) + 3476 " is a large number."); 3477 } catch(Exception e){ 3478 } 3479 } 3480 3481 3482 /* Testing UCharacter.toChars(cp)*/ 3483 for(int i=0; i<positive_cases.length; i++){ 3484 // Testing negative values when cp < 0 for if (cp >= 0) 3485 try{ 3486 UCharacter.toChars(-1*positive_cases[i]); 3487 errln("UCharacter.toChars(cint) was suppose to return an exception " + 3488 "when the parameter " + positive_cases[i] + " is a negative number."); 3489 } catch(Exception e){ 3490 } 3491 3492 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true 3493 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]).length <= 0){ 3494 errln("UCharacter.toChars(int) was suppose to return some result result when the parameter " + 3495 (UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]) + "is passed."); 3496 } 3497 3498 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and 3499 // when "if (cp <= MAX_CODE_POINT)" is false 3500 try{ 3501 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i]); 3502 errln("UCharacter.toChars(int) was suppose to return an exception " + 3503 "when the parameter " + positive_cases[i] + " is a large number."); 3504 } catch(Exception e){ 3505 } 3506 } 3507 } 3508 3509 /* 3510 * The following method tests 3511 * public static int codePointCount(CharSequence text, int start, int limit) 3512 * public static int codePointCount(char[] text, int start, int limit) 3513 */ 3514 @Test 3515 public void TestCodePointCount(){ 3516 // The following tests the first if statement to make it true: 3517 // if (start < 0 || limit < start || limit > text.length) 3518 // which will throw an exception. 3519 char[] empty_text = {}; 3520 char[] one_char_text = {'a'}; 3521 char[] reg_text = {'d','u','m','m','y'}; 3522 int[] invalid_startCases = {-1,-2,-5,-10,-100}; 3523 int[] limitCases = {2,3,5,10,25}; 3524 3525 // When start < 0 3526 for(int i=0; i < invalid_startCases.length; i++){ 3527 try{ 3528 UCharacter.codePointCount(reg_text, invalid_startCases[i], 1); 3529 errln("UCharacter.codePointCount was suppose to return an exception " + 3530 "but got " + UCharacter.codePointCount(reg_text, invalid_startCases[i], 1) + 3531 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3532 invalid_startCases[i] + ", Limit: " + 1 + "."); 3533 } catch(Exception e){ 3534 } 3535 } 3536 3537 // When limit < start 3538 for(int i=0; i < limitCases.length; i++){ 3539 try{ 3540 UCharacter.codePointCount(reg_text, 100, limitCases[i]); 3541 errln("UCharacter.codePointCount was suppose to return an exception " + 3542 "but got " + UCharacter.codePointCount(reg_text, 100, limitCases[i]) + 3543 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3544 100 + ", Limit: " + limitCases[i] + "."); 3545 } catch(Exception e){ 3546 } 3547 } 3548 3549 // When limit > text.length 3550 for(int i=0; i < limitCases.length; i++){ 3551 try{ 3552 UCharacter.codePointCount(empty_text, 0, limitCases[i]); 3553 errln("UCharacter.codePointCount was suppose to return an exception " + 3554 "but got " + UCharacter.codePointCount(empty_text, 0, limitCases[i]) + 3555 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " + 3556 0 + ", Limit: " + limitCases[i] + "."); 3557 } catch(Exception e){ 3558 } 3559 3560 try{ 3561 UCharacter.codePointCount(one_char_text, 0, limitCases[i]); 3562 errln("UCharacter.codePointCount was suppose to return an exception " + 3563 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) + 3564 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " + 3565 0 + ", Limit: " + limitCases[i] + "."); 3566 } catch(Exception e){ 3567 } 3568 } 3569 } 3570 3571 /* 3572 * The following method tests 3573 * private static int getEuropeanDigit(int ch) 3574 * The method needs to use the method "digit" in order to access the 3575 * getEuropeanDigit method. 3576 */ 3577 @Test 3578 public void TestGetEuropeanDigit(){ 3579 //The number retrieved from 0xFF41 to 0xFF5A is due to 3580 // exhaustive testing from UTF16.CODEPOINT_MIN_VALUE to 3581 // UTF16.CODEPOINT_MAX_VALUE return a value of -1. 3582 3583 int[] radixResult = { 3584 10,11,12,13,14,15,16,17,18,19,20,21,22, 3585 23,24,25,26,27,28,29,30,31,32,33,34,35}; 3586 // Invalid and too-small-for-these-digits radix values. 3587 int[] radixCase1 = {0,1,5,10,100}; 3588 // Radix values that work for at least some of the "digits". 3589 int[] radixCase2 = {12,16,20,36}; 3590 3591 for(int i=0xFF41; i<=0xFF5A; i++){ 3592 for(int j=0; j < radixCase1.length; j++){ 3593 if(UCharacter.digit(i, radixCase1[j]) != -1){ 3594 errln("UCharacter.digit(int,int) was supposed to return -1 for radix " + radixCase1[j] 3595 + ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + UCharacter.digit(i, radixCase1[j])); 3596 } 3597 } 3598 for(int j=0; j < radixCase2.length; j++){ 3599 int radix = radixCase2[j]; 3600 int expected = (radixResult[i-0xFF41] < radix) ? radixResult[i-0xFF41] : -1; 3601 int actual = UCharacter.digit(i, radix); 3602 if(actual != expected){ 3603 errln("UCharacter.digit(int,int) was supposed to return " + 3604 expected + " for radix " + radix + 3605 ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + actual); 3606 break; 3607 } 3608 } 3609 } 3610 } 3611 3612 /* Tests the method 3613 * private static final int getProperty(int ch) 3614 * from public static int getType(int ch) 3615 */ 3616 @Test 3617 public void TestGetProperty(){ 3618 int[] cases = {UTF16.CODEPOINT_MAX_VALUE+1, UTF16.CODEPOINT_MAX_VALUE+2}; 3619 for(int i=0; i < cases.length; i++) 3620 if(UCharacter.getType(cases[i]) != 0) 3621 errln("UCharacter.getType for testing UCharacter.getProperty " 3622 + "did not return 0 for passed value of " + cases[i] + 3623 " but got " + UCharacter.getType(cases[i])); 3624 } 3625 3626 /* Tests the class 3627 * abstract public static class XSymbolTable implements SymbolTable 3628 */ 3629 @Test 3630 public void TestXSymbolTable(){ 3631 class MyXSymbolTable extends UnicodeSet.XSymbolTable {} 3632 MyXSymbolTable st = new MyXSymbolTable(); 3633 3634 // Tests "public UnicodeMatcher lookupMatcher(int i)" 3635 if(st.lookupMatcher(0) != null) 3636 errln("XSymbolTable.lookupMatcher(int i) was suppose to return null."); 3637 3638 // Tests "public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)" 3639 if(st.applyPropertyAlias("", "", new UnicodeSet()) != false) 3640 errln("XSymbolTable.applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) was suppose to return false."); 3641 3642 // Tests "public char[] lookup(String s)" 3643 if(st.lookup("") != null) 3644 errln("XSymbolTable.lookup(String s) was suppose to return null."); 3645 3646 // Tests "public String parseReference(String text, ParsePosition pos, int limit)" 3647 if(st.parseReference("", null, 0) != null) 3648 errln("XSymbolTable.parseReference(String text, ParsePosition pos, int limit) was suppose to return null."); 3649 } 3650 3651 /* Tests the method 3652 * public boolean isFrozen() 3653 */ 3654 @Test 3655 public void TestIsFrozen(){ 3656 UnicodeSet us = new UnicodeSet(); 3657 if(us.isFrozen() != false) 3658 errln("Unicode.isFrozen() was suppose to return false."); 3659 3660 us.freeze(); 3661 if(us.isFrozen() != true) 3662 errln("Unicode.isFrozen() was suppose to return true."); 3663 } 3664 3665 /* Tests the methods 3666 * public static String getNameAlias() and 3667 * public static String getCharFromNameAlias() 3668 */ 3669 @Test 3670 public void testNameAliasing() { 3671 int input = '\u01a2'; 3672 String alias = UCharacter.getNameAlias(input); 3673 assertEquals("Wrong name alias", "LATIN CAPITAL LETTER GHA", alias); 3674 int output = UCharacter.getCharFromNameAlias(alias); 3675 assertEquals("alias for '" + input + "'", input, output); 3676 } 3677 3678 @Test 3679 public void TestBinaryCharacterProperties() { 3680 try { 3681 CharacterProperties.getBinaryPropertySet(-1); 3682 fail("getBinaryPropertySet(-1) did not throw an exception"); 3683 CharacterProperties.getBinaryPropertySet(UProperty.BINARY_LIMIT); 3684 fail("getBinaryPropertySet(BINARY_LIMIT) did not throw an exception"); 3685 } catch(Exception expected) { 3686 } 3687 // Spot-check getBinaryPropertySet() vs. hasBinaryProperty(). 3688 for (int prop = 0; prop < UProperty.BINARY_LIMIT; ++prop) { 3689 UnicodeSet set = CharacterProperties.getBinaryPropertySet(prop); 3690 int size = set.size(); 3691 if (size == 0) { 3692 assertFalse("!hasBinaryProperty(U+0020, " + prop + ')', 3693 UCharacter.hasBinaryProperty(0x20, prop)); 3694 assertFalse("!hasBinaryProperty(U+0061, " + prop + ')', 3695 UCharacter.hasBinaryProperty(0x61, prop)); 3696 assertFalse("!hasBinaryProperty(U+4E00, " + prop + ')', 3697 UCharacter.hasBinaryProperty(0x4e00, prop)); 3698 } else { 3699 int c = set.charAt(0); 3700 if (c > 0) { 3701 assertFalse("!hasBinaryProperty(" + Utility.hex(c - 1) + ", " + prop + ')', 3702 UCharacter.hasBinaryProperty(c - 1, prop)); 3703 } 3704 assertTrue("hasBinaryProperty(" + Utility.hex(c) + ", " + prop + ')', 3705 UCharacter.hasBinaryProperty(c, prop)); 3706 c = set.charAt(size - 1); 3707 assertTrue("hasBinaryProperty(" + Utility.hex(c) + ", " + prop + ')', 3708 UCharacter.hasBinaryProperty(c, prop)); 3709 if (c < 0x10ffff) { 3710 assertFalse("!hasBinaryProperty(" + Utility.hex(c + 1) + ", " + prop + ')', 3711 UCharacter.hasBinaryProperty(c + 1, prop)); 3712 } 3713 } 3714 } 3715 } 3716 3717 @Test 3718 public void TestIntCharacterProperties() { 3719 try { 3720 CharacterProperties.getIntPropertyMap(UProperty.INT_START - 1); 3721 fail("getIntPropertyMap(INT_START-1) did not throw an exception"); 3722 CharacterProperties.getIntPropertyMap(UProperty.INT_LIMIT); 3723 fail("getIntPropertyMap(INT_LIMIT) did not throw an exception"); 3724 } catch(Exception expected) { 3725 } 3726 // Spot-check getIntPropertyMap() vs. getIntPropertyValue(). 3727 CodePointMap.Range range = new CodePointMap.Range(); 3728 for (int prop = UProperty.INT_START; prop < UProperty.INT_LIMIT; ++prop) { 3729 CodePointMap map = CharacterProperties.getIntPropertyMap(prop); 3730 assertTrue("int property first range", map.getRange(0, null, range)); 3731 int c = (range.getStart() + range.getEnd()) / 2; 3732 assertEquals("int property first range value at " + Utility.hex(c), 3733 UCharacter.getIntPropertyValue(c, prop), range.getValue()); 3734 assertTrue("int property later range", map.getRange(0x5000, null, range)); 3735 int end = range.getEnd(); 3736 assertEquals("int property later range value at " + Utility.hex(end), 3737 UCharacter.getIntPropertyValue(end, prop), range.getValue()); 3738 } 3739 } 3740 } 3741