1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 2001-2016 International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.lang; 12 13 import java.util.BitSet; 14 import java.util.Locale; 15 16 import ohos.global.icu.impl.UCharacterProperty; 17 import ohos.global.icu.util.ULocale; 18 19 /** 20 * Constants for ISO 15924 script codes, and related functions. 21 * 22 * <p>The current set of script code constants supports at least all scripts 23 * that are encoded in the version of Unicode which ICU currently supports. 24 * The names of the constants are usually derived from the 25 * Unicode script property value aliases. 26 * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) 27 * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . 28 * 29 * <p>In addition, constants for many ISO 15924 script codes 30 * are included, for use with language tags, CLDR data, and similar. 31 * Some of those codes are not used in the Unicode Character Database (UCD). 32 * For example, there are no characters that have a UCD script property value of 33 * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. 34 * 35 * <p>Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR. 36 * 37 * <p>Starting with ICU 55, script codes are only added when their scripts 38 * have been or will certainly be encoded in Unicode, 39 * and have been assigned Unicode script property value aliases, 40 * to ensure that their script names are stable and match the names of the constants. 41 * Script codes like Latf and Aran that are not subject to separate encoding 42 * may be added at any time. 43 */ 44 public final class UScript { 45 /** 46 * Invalid code 47 */ 48 public static final int INVALID_CODE = -1; 49 /** 50 * Common 51 */ 52 public static final int COMMON = 0; /* Zyyy */ 53 /** 54 * Inherited 55 */ 56 public static final int INHERITED = 1; /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ 57 /** 58 * Arabic 59 */ 60 public static final int ARABIC = 2; /* Arab */ 61 /** 62 * Armenian 63 */ 64 public static final int ARMENIAN = 3; /* Armn */ 65 /** 66 * Bengali 67 */ 68 public static final int BENGALI = 4; /* Beng */ 69 /** 70 * Bopomofo 71 */ 72 public static final int BOPOMOFO = 5; /* Bopo */ 73 /** 74 * Cherokee 75 */ 76 public static final int CHEROKEE = 6; /* Cher */ 77 /** 78 * Coptic 79 */ 80 public static final int COPTIC = 7; /* Qaac */ 81 /** 82 * Cyrillic 83 */ 84 public static final int CYRILLIC = 8; /* Cyrl (Cyrs) */ 85 /** 86 * Deseret 87 */ 88 public static final int DESERET = 9; /* Dsrt */ 89 /** 90 * Devanagari 91 */ 92 public static final int DEVANAGARI = 10; /* Deva */ 93 /** 94 * Ethiopic 95 */ 96 public static final int ETHIOPIC = 11; /* Ethi */ 97 /** 98 * Georgian 99 */ 100 public static final int GEORGIAN = 12; /* Geor (Geon; Geoa) */ 101 /** 102 * Gothic 103 */ 104 public static final int GOTHIC = 13; /* Goth */ 105 /** 106 * Greek 107 */ 108 public static final int GREEK = 14; /* Grek */ 109 /** 110 * Gujarati 111 */ 112 public static final int GUJARATI = 15; /* Gujr */ 113 /** 114 * Gurmukhi 115 */ 116 public static final int GURMUKHI = 16; /* Guru */ 117 /** 118 * Han 119 */ 120 public static final int HAN = 17; /* Hani */ 121 /** 122 * Hangul 123 */ 124 public static final int HANGUL = 18; /* Hang */ 125 /** 126 * Hebrew 127 */ 128 public static final int HEBREW = 19; /* Hebr */ 129 /** 130 * Hiragana 131 */ 132 public static final int HIRAGANA = 20; /* Hira */ 133 /** 134 * Kannada 135 */ 136 public static final int KANNADA = 21; /* Knda */ 137 /** 138 * Katakana 139 */ 140 public static final int KATAKANA = 22; /* Kana */ 141 /** 142 * Khmer 143 */ 144 public static final int KHMER = 23; /* Khmr */ 145 /** 146 * Lao 147 */ 148 public static final int LAO = 24; /* Laoo */ 149 /** 150 * Latin 151 */ 152 public static final int LATIN = 25; /* Latn (Latf; Latg) */ 153 /** 154 * Malayalam 155 */ 156 public static final int MALAYALAM = 26; /* Mlym */ 157 /** 158 * Mangolian 159 */ 160 public static final int MONGOLIAN = 27; /* Mong */ 161 /** 162 * Myammar 163 */ 164 public static final int MYANMAR = 28; /* Mymr */ 165 /** 166 * Ogham 167 */ 168 public static final int OGHAM = 29; /* Ogam */ 169 /** 170 * Old Itallic 171 */ 172 public static final int OLD_ITALIC = 30; /* Ital */ 173 /** 174 * Oriya 175 */ 176 public static final int ORIYA = 31; /* Orya */ 177 /** 178 * Runic 179 */ 180 public static final int RUNIC = 32; /* Runr */ 181 /** 182 * Sinhala 183 */ 184 public static final int SINHALA = 33; /* Sinh */ 185 /** 186 * Syriac 187 */ 188 public static final int SYRIAC = 34; /* Syrc (Syrj; Syrn; Syre) */ 189 /** 190 * Tamil 191 */ 192 public static final int TAMIL = 35; /* Taml */ 193 /** 194 * Telugu 195 */ 196 public static final int TELUGU = 36; /* Telu */ 197 /** 198 * Thana 199 */ 200 public static final int THAANA = 37; /* Thaa */ 201 /** 202 * Thai 203 */ 204 public static final int THAI = 38; /* Thai */ 205 /** 206 * Tibetan 207 */ 208 public static final int TIBETAN = 39; /* Tibt */ 209 /** 210 * Unified Canadian Aboriginal Symbols 211 */ 212 public static final int CANADIAN_ABORIGINAL = 40; /* Cans */ 213 /** 214 * Unified Canadian Aboriginal Symbols (alias) 215 */ 216 public static final int UCAS = CANADIAN_ABORIGINAL; /* Cans */ 217 /** 218 * Yi syllables 219 */ 220 public static final int YI = 41; /* Yiii */ 221 /** 222 * Tagalog 223 */ 224 public static final int TAGALOG = 42; /* Tglg */ 225 /** 226 * Hanunooo 227 */ 228 public static final int HANUNOO = 43; /* Hano */ 229 /** 230 * Buhid 231 */ 232 public static final int BUHID = 44; /* Buhd */ 233 /** 234 * Tagbanwa 235 */ 236 public static final int TAGBANWA = 45; /* Tagb */ 237 /** 238 * Braille 239 * Script in Unicode 4 240 * 241 */ 242 public static final int BRAILLE = 46; /* Brai */ 243 /** 244 * Cypriot 245 * Script in Unicode 4 246 * 247 */ 248 public static final int CYPRIOT = 47; /* Cprt */ 249 /** 250 * Limbu 251 * Script in Unicode 4 252 * 253 */ 254 public static final int LIMBU = 48; /* Limb */ 255 /** 256 * Linear B 257 * Script in Unicode 4 258 * 259 */ 260 public static final int LINEAR_B = 49; /* Linb */ 261 /** 262 * Osmanya 263 * Script in Unicode 4 264 * 265 */ 266 public static final int OSMANYA = 50; /* Osma */ 267 /** 268 * Shavian 269 * Script in Unicode 4 270 * 271 */ 272 public static final int SHAVIAN = 51; /* Shaw */ 273 /** 274 * Tai Le 275 * Script in Unicode 4 276 * 277 */ 278 public static final int TAI_LE = 52; /* Tale */ 279 /** 280 * Ugaritic 281 * Script in Unicode 4 282 * 283 */ 284 public static final int UGARITIC = 53; /* Ugar */ 285 /** 286 * Script in Unicode 4.0.1 287 */ 288 public static final int KATAKANA_OR_HIRAGANA = 54; /*Hrkt */ 289 290 /** 291 * Script in Unicode 4.1 292 */ 293 public static final int BUGINESE = 55; /* Bugi */ 294 /** 295 * Script in Unicode 4.1 296 */ 297 public static final int GLAGOLITIC = 56; /* Glag */ 298 /** 299 * Script in Unicode 4.1 300 */ 301 public static final int KHAROSHTHI = 57; /* Khar */ 302 /** 303 * Script in Unicode 4.1 304 */ 305 public static final int SYLOTI_NAGRI = 58; /* Sylo */ 306 /** 307 * Script in Unicode 4.1 308 */ 309 public static final int NEW_TAI_LUE = 59; /* Talu */ 310 /** 311 * Script in Unicode 4.1 312 */ 313 public static final int TIFINAGH = 60; /* Tfng */ 314 /** 315 * Script in Unicode 4.1 316 */ 317 public static final int OLD_PERSIAN = 61; /* Xpeo */ 318 319 320 /** 321 * ISO 15924 script code 322 */ 323 public static final int BALINESE = 62; /* Bali */ 324 /** 325 * ISO 15924 script code 326 */ 327 public static final int BATAK = 63; /* Batk */ 328 /** 329 * ISO 15924 script code 330 */ 331 public static final int BLISSYMBOLS = 64; /* Blis */ 332 /** 333 * ISO 15924 script code 334 */ 335 public static final int BRAHMI = 65; /* Brah */ 336 /** 337 * ISO 15924 script code 338 */ 339 public static final int CHAM = 66; /* Cham */ 340 /** 341 * ISO 15924 script code 342 */ 343 public static final int CIRTH = 67; /* Cirt */ 344 /** 345 * ISO 15924 script code 346 */ 347 public static final int OLD_CHURCH_SLAVONIC_CYRILLIC = 68; /* Cyrs */ 348 /** 349 * ISO 15924 script code 350 */ 351 public static final int DEMOTIC_EGYPTIAN = 69; /* Egyd */ 352 /** 353 * ISO 15924 script code 354 */ 355 public static final int HIERATIC_EGYPTIAN = 70; /* Egyh */ 356 /** 357 * ISO 15924 script code 358 */ 359 public static final int EGYPTIAN_HIEROGLYPHS = 71; /* Egyp */ 360 /** 361 * ISO 15924 script code 362 */ 363 public static final int KHUTSURI = 72; /* Geok */ 364 /** 365 * ISO 15924 script code 366 */ 367 public static final int SIMPLIFIED_HAN = 73; /* Hans */ 368 /** 369 * ISO 15924 script code 370 */ 371 public static final int TRADITIONAL_HAN = 74; /* Hant */ 372 /** 373 * ISO 15924 script code 374 */ 375 public static final int PAHAWH_HMONG = 75; /* Hmng */ 376 /** 377 * ISO 15924 script code 378 */ 379 public static final int OLD_HUNGARIAN = 76; /* Hung */ 380 /** 381 * ISO 15924 script code 382 */ 383 public static final int HARAPPAN_INDUS = 77; /* Inds */ 384 /** 385 * ISO 15924 script code 386 */ 387 public static final int JAVANESE = 78; /* Java */ 388 /** 389 * ISO 15924 script code 390 */ 391 public static final int KAYAH_LI = 79; /* Kali */ 392 /** 393 * ISO 15924 script code 394 */ 395 public static final int LATIN_FRAKTUR = 80; /* Latf */ 396 /** 397 * ISO 15924 script code 398 */ 399 public static final int LATIN_GAELIC = 81; /* Latg */ 400 /** 401 * ISO 15924 script code 402 */ 403 public static final int LEPCHA = 82; /* Lepc */ 404 /** 405 * ISO 15924 script code 406 */ 407 public static final int LINEAR_A = 83; /* Lina */ 408 /** 409 * ISO 15924 script code 410 */ 411 public static final int MANDAIC = 84; /* Mand */ 412 /** 413 * ISO 15924 script code 414 */ 415 public static final int MANDAEAN = MANDAIC; 416 /** 417 * ISO 15924 script code 418 */ 419 public static final int MAYAN_HIEROGLYPHS = 85; /* Maya */ 420 /** 421 * ISO 15924 script code 422 */ 423 public static final int MEROITIC_HIEROGLYPHS = 86; /* Mero */ 424 /** 425 * ISO 15924 script code 426 */ 427 public static final int MEROITIC = MEROITIC_HIEROGLYPHS; 428 /** 429 * ISO 15924 script code 430 */ 431 public static final int NKO = 87; /* Nkoo */ 432 /** 433 * ISO 15924 script code 434 */ 435 public static final int ORKHON = 88; /* Orkh */ 436 /** 437 * ISO 15924 script code 438 */ 439 public static final int OLD_PERMIC = 89; /* Perm */ 440 /** 441 * ISO 15924 script code 442 */ 443 public static final int PHAGS_PA = 90; /* Phag */ 444 /** 445 * ISO 15924 script code 446 */ 447 public static final int PHOENICIAN = 91; /* Phnx */ 448 /** 449 * ISO 15924 script code 450 */ 451 public static final int MIAO = 92; /* Plrd */ 452 /** 453 * ISO 15924 script code 454 */ 455 public static final int PHONETIC_POLLARD = MIAO; 456 /** 457 * ISO 15924 script code 458 */ 459 public static final int RONGORONGO = 93; /* Roro */ 460 /** 461 * ISO 15924 script code 462 */ 463 public static final int SARATI = 94; /* Sara */ 464 /** 465 * ISO 15924 script code 466 */ 467 public static final int ESTRANGELO_SYRIAC = 95; /* Syre */ 468 /** 469 * ISO 15924 script code 470 */ 471 public static final int WESTERN_SYRIAC = 96; /* Syrj */ 472 /** 473 * ISO 15924 script code 474 */ 475 public static final int EASTERN_SYRIAC = 97; /* Syrn */ 476 /** 477 * ISO 15924 script code 478 */ 479 public static final int TENGWAR = 98; /* Teng */ 480 /** 481 * ISO 15924 script code 482 */ 483 public static final int VAI = 99; /* Vaii */ 484 /** 485 * ISO 15924 script code 486 */ 487 public static final int VISIBLE_SPEECH = 100;/* Visp */ 488 /** 489 * ISO 15924 script code 490 */ 491 public static final int CUNEIFORM = 101;/* Xsux */ 492 /** 493 * ISO 15924 script code 494 */ 495 public static final int UNWRITTEN_LANGUAGES = 102;/* Zxxx */ 496 /** 497 * ISO 15924 script code 498 */ 499 public static final int UNKNOWN = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ 500 501 /** 502 * ISO 15924 script code 503 */ 504 public static final int CARIAN = 104;/* Cari */ 505 /** 506 * ISO 15924 script code 507 */ 508 public static final int JAPANESE = 105;/* Jpan */ 509 /** 510 * ISO 15924 script code 511 */ 512 public static final int LANNA = 106;/* Lana */ 513 /** 514 * ISO 15924 script code 515 */ 516 public static final int LYCIAN = 107;/* Lyci */ 517 /** 518 * ISO 15924 script code 519 */ 520 public static final int LYDIAN = 108;/* Lydi */ 521 /** 522 * ISO 15924 script code 523 */ 524 public static final int OL_CHIKI = 109;/* Olck */ 525 /** 526 * ISO 15924 script code 527 */ 528 public static final int REJANG = 110;/* Rjng */ 529 /** 530 * ISO 15924 script code 531 */ 532 public static final int SAURASHTRA = 111;/* Saur */ 533 /** 534 * ISO 15924 script code for Sutton SignWriting 535 */ 536 public static final int SIGN_WRITING = 112;/* Sgnw */ 537 /** 538 * ISO 15924 script code 539 */ 540 public static final int SUNDANESE = 113;/* Sund */ 541 /** 542 * ISO 15924 script code 543 */ 544 public static final int MOON = 114;/* Moon */ 545 /** 546 * ISO 15924 script code 547 */ 548 public static final int MEITEI_MAYEK = 115;/* Mtei */ 549 550 /** 551 * ISO 15924 script code 552 */ 553 public static final int IMPERIAL_ARAMAIC = 116;/* Armi */ 554 555 /** 556 * ISO 15924 script code 557 */ 558 public static final int AVESTAN = 117;/* Avst */ 559 560 /** 561 * ISO 15924 script code 562 */ 563 public static final int CHAKMA = 118;/* Cakm */ 564 565 /** 566 * ISO 15924 script code 567 */ 568 public static final int KOREAN = 119;/* Kore */ 569 570 /** 571 * ISO 15924 script code 572 */ 573 public static final int KAITHI = 120;/* Kthi */ 574 575 /** 576 * ISO 15924 script code 577 */ 578 public static final int MANICHAEAN = 121;/* Mani */ 579 580 /** 581 * ISO 15924 script code 582 */ 583 public static final int INSCRIPTIONAL_PAHLAVI = 122;/* Phli */ 584 585 /** 586 * ISO 15924 script code 587 */ 588 public static final int PSALTER_PAHLAVI = 123;/* Phlp */ 589 590 /** 591 * ISO 15924 script code 592 */ 593 public static final int BOOK_PAHLAVI = 124;/* Phlv */ 594 595 /** 596 * ISO 15924 script code 597 */ 598 public static final int INSCRIPTIONAL_PARTHIAN = 125;/* Prti */ 599 600 /** 601 * ISO 15924 script code 602 */ 603 public static final int SAMARITAN = 126;/* Samr */ 604 605 /** 606 * ISO 15924 script code 607 */ 608 public static final int TAI_VIET = 127;/* Tavt */ 609 610 /** 611 * ISO 15924 script code 612 */ 613 public static final int MATHEMATICAL_NOTATION = 128;/* Zmth */ 614 615 /** 616 * ISO 15924 script code 617 */ 618 public static final int SYMBOLS = 129;/* Zsym */ 619 620 /** 621 * ISO 15924 script code 622 */ 623 public static final int BAMUM = 130;/* Bamu */ 624 /** 625 * ISO 15924 script code 626 */ 627 public static final int LISU = 131;/* Lisu */ 628 /** 629 * ISO 15924 script code 630 */ 631 public static final int NAKHI_GEBA = 132;/* Nkgb */ 632 /** 633 * ISO 15924 script code 634 */ 635 public static final int OLD_SOUTH_ARABIAN = 133;/* Sarb */ 636 637 /** 638 * ISO 15924 script code 639 */ 640 public static final int BASSA_VAH = 134;/* Bass */ 641 /** 642 * ISO 15924 script code 643 */ 644 public static final int DUPLOYAN = 135;/* Dupl */ 645 /** 646 * Typo, use DUPLOYAN 647 * @deprecated ICU 54 648 * @hide deprecated on icu4j-org 649 */ 650 @Deprecated 651 public static final int DUPLOYAN_SHORTAND = DUPLOYAN; 652 /** 653 * ISO 15924 script code 654 */ 655 public static final int ELBASAN = 136;/* Elba */ 656 /** 657 * ISO 15924 script code 658 */ 659 public static final int GRANTHA = 137;/* Gran */ 660 /** 661 * ISO 15924 script code 662 */ 663 public static final int KPELLE = 138;/* Kpel */ 664 /** 665 * ISO 15924 script code 666 */ 667 public static final int LOMA = 139;/* Loma */ 668 /** 669 * Mende Kikakui 670 * ISO 15924 script code 671 */ 672 public static final int MENDE = 140;/* Mend */ 673 /** 674 * ISO 15924 script code 675 */ 676 public static final int MEROITIC_CURSIVE = 141;/* Merc */ 677 /** 678 * ISO 15924 script code 679 */ 680 public static final int OLD_NORTH_ARABIAN = 142;/* Narb */ 681 /** 682 * ISO 15924 script code 683 */ 684 public static final int NABATAEAN = 143;/* Nbat */ 685 /** 686 * ISO 15924 script code 687 */ 688 public static final int PALMYRENE = 144;/* Palm */ 689 /** 690 * ISO 15924 script code 691 */ 692 public static final int KHUDAWADI = 145;/* Sind */ 693 /** 694 * ISO 15924 script code 695 */ 696 public static final int SINDHI = KHUDAWADI; 697 /** 698 * ISO 15924 script code 699 */ 700 public static final int WARANG_CITI = 146;/* Wara */ 701 702 /** 703 * ISO 15924 script code 704 */ 705 public static final int AFAKA = 147;/* Afak */ 706 /** 707 * ISO 15924 script code 708 */ 709 public static final int JURCHEN = 148;/* Jurc */ 710 /** 711 * ISO 15924 script code 712 */ 713 public static final int MRO = 149;/* Mroo */ 714 /** 715 * ISO 15924 script code 716 */ 717 public static final int NUSHU = 150;/* Nshu */ 718 /** 719 * ISO 15924 script code 720 */ 721 public static final int SHARADA = 151;/* Shrd */ 722 /** 723 * ISO 15924 script code 724 */ 725 public static final int SORA_SOMPENG = 152;/* Sora */ 726 /** 727 * ISO 15924 script code 728 */ 729 public static final int TAKRI = 153;/* Takr */ 730 /** 731 * ISO 15924 script code 732 */ 733 public static final int TANGUT = 154;/* Tang */ 734 /** 735 * ISO 15924 script code 736 */ 737 public static final int WOLEAI = 155;/* Wole */ 738 739 /** 740 * ISO 15924 script code 741 */ 742 public static final int ANATOLIAN_HIEROGLYPHS = 156;/* Hluw */ 743 /** 744 * ISO 15924 script code 745 */ 746 public static final int KHOJKI = 157;/* Khoj */ 747 /** 748 * ISO 15924 script code 749 */ 750 public static final int TIRHUTA = 158;/* Tirh */ 751 /** 752 * ISO 15924 script code 753 */ 754 public static final int CAUCASIAN_ALBANIAN = 159; /* Aghb */ 755 /** 756 * ISO 15924 script code 757 */ 758 public static final int MAHAJANI = 160; /* Mahj */ 759 760 /** 761 * ISO 15924 script code 762 */ 763 public static final int AHOM = 161; /* Ahom */ 764 /** 765 * ISO 15924 script code 766 */ 767 public static final int HATRAN = 162; /* Hatr */ 768 /** 769 * ISO 15924 script code 770 */ 771 public static final int MODI = 163; /* Modi */ 772 /** 773 * ISO 15924 script code 774 */ 775 public static final int MULTANI = 164; /* Mult */ 776 /** 777 * ISO 15924 script code 778 */ 779 public static final int PAU_CIN_HAU = 165; /* Pauc */ 780 /** 781 * ISO 15924 script code 782 */ 783 public static final int SIDDHAM = 166; /* Sidd */ 784 785 /** 786 * ISO 15924 script code 787 */ 788 public static final int ADLAM = 167; /* Adlm */ 789 /** 790 * ISO 15924 script code 791 */ 792 public static final int BHAIKSUKI = 168; /* Bhks */ 793 /** 794 * ISO 15924 script code 795 */ 796 public static final int MARCHEN = 169; /* Marc */ 797 /** 798 * ISO 15924 script code 799 */ 800 public static final int NEWA = 170; /* Newa */ 801 /** 802 * ISO 15924 script code 803 */ 804 public static final int OSAGE = 171; /* Osge */ 805 806 /** 807 * ISO 15924 script code 808 */ 809 public static final int HAN_WITH_BOPOMOFO = 172; /* Hanb */ 810 /** 811 * ISO 15924 script code 812 */ 813 public static final int JAMO = 173; /* Jamo */ 814 /** 815 * ISO 15924 script code 816 */ 817 public static final int SYMBOLS_EMOJI = 174; /* Zsye */ 818 819 /** 820 * ISO 15924 script code 821 */ 822 public static final int MASARAM_GONDI = 175; /* Gonm */ 823 /** 824 * ISO 15924 script code 825 */ 826 public static final int SOYOMBO = 176; /* Soyo */ 827 /** 828 * ISO 15924 script code 829 */ 830 public static final int ZANABAZAR_SQUARE = 177; /* Zanb */ 831 832 /** 833 * ISO 15924 script code 834 */ 835 public static final int DOGRA = 178; /* Dogr */ 836 /***/ 837 public static final int GUNJALA_GONDI = 179; /* Gong */ 838 /***/ 839 public static final int MAKASAR = 180; /* Maka */ 840 /***/ 841 public static final int MEDEFAIDRIN = 181; /* Medf */ 842 /***/ 843 public static final int HANIFI_ROHINGYA = 182; /* Rohg */ 844 /***/ 845 public static final int SOGDIAN = 183; /* Sogd */ 846 /***/ 847 public static final int OLD_SOGDIAN = 184; /* Sogo */ 848 849 /***/ 850 public static final int ELYMAIC = 185; /* Elym */ 851 /***/ 852 public static final int NYIAKENG_PUACHUE_HMONG = 186; /* Hmnp */ 853 /***/ 854 public static final int NANDINAGARI = 187; /* Nand */ 855 /***/ 856 public static final int WANCHO = 188; /* Wcho */ 857 858 /***/ 859 public static final int CHORASMIAN = 189; /* Chrs */ 860 /***/ 861 public static final int DIVES_AKURU = 190; /* Diak */ 862 /***/ 863 public static final int KHITAN_SMALL_SCRIPT = 191; /* Kits */ 864 /***/ 865 public static final int YEZIDI = 192; /* Yezi */ 866 867 /** 868 * One more than the highest normal UScript code. 869 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT). 870 * 871 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 872 * @hide unsupported on OHOS 873 */ 874 @Deprecated 875 public static final int CODE_LIMIT = 193; 876 getCodesFromLocale(ULocale locale)877 private static int[] getCodesFromLocale(ULocale locale) { 878 // Multi-script languages, equivalent to the LocaleScript data 879 // that we used to load from locale resource bundles. 880 String lang = locale.getLanguage(); 881 if(lang.equals("ja")) { 882 return new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN }; 883 } 884 if(lang.equals("ko")) { 885 return new int[] { UScript.HANGUL, UScript.HAN }; 886 } 887 String script = locale.getScript(); 888 if(lang.equals("zh") && script.equals("Hant")) { 889 return new int[] { UScript.HAN, UScript.BOPOMOFO }; 890 } 891 // Explicit script code. 892 if(script.length() != 0) { 893 if (lang.equals("en") && script.equals("Qaag")) { 894 return new int[] { UScript.LATIN }; 895 } 896 // In Burmese, the script is Zawgyi 897 if (lang.equals("my") && script.equals("Qaag")) { 898 return new int[] { UScript.MYANMAR }; 899 } 900 int scriptCode = UScript.getCodeFromName(script); 901 if(scriptCode != UScript.INVALID_CODE) { 902 if(scriptCode == UScript.SIMPLIFIED_HAN || scriptCode == UScript.TRADITIONAL_HAN) { 903 scriptCode = UScript.HAN; 904 } 905 return new int[] { scriptCode }; 906 } 907 } 908 return null; 909 } 910 911 /** 912 * Helper function to find the code from locale. 913 * @param locale The locale. 914 */ findCodeFromLocale(ULocale locale)915 private static int[] findCodeFromLocale(ULocale locale) { 916 int[] result = getCodesFromLocale(locale); 917 if(result != null) { 918 return result; 919 } 920 ULocale likely = ULocale.addLikelySubtags(locale); 921 return getCodesFromLocale(likely); 922 } 923 924 /** 925 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. 926 * Returns MALAYAM given "Malayam" OR "Mlym". 927 * Returns LATIN given "en" OR "en_US" 928 * @param locale Locale 929 * @return The script codes array. null if the the code cannot be found. 930 */ getCode(Locale locale)931 public static final int[] getCode(Locale locale){ 932 return findCodeFromLocale(ULocale.forLocale(locale)); 933 } 934 /** 935 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. 936 * Returns MALAYAM given "Malayam" OR "Mlym". 937 * Returns LATIN given "en" OR "en_US" 938 * @param locale ULocale 939 * @return The script codes array. null if the the code cannot be found. 940 */ getCode(ULocale locale)941 public static final int[] getCode(ULocale locale){ 942 return findCodeFromLocale(locale); 943 } 944 /** 945 * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. 946 * Returns MALAYAM given "Malayam" OR "Mlym". 947 * Returns LATIN given "en" OR "en_US" 948 * 949 * <p>Note: To search by short or long script alias only, use 950 * {@link #getCodeFromName(String)} instead. 951 * That does a fast lookup with no access of the locale data. 952 * 953 * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale 954 * @return The script codes array. null if the the code cannot be found. 955 */ getCode(String nameOrAbbrOrLocale)956 public static final int[] getCode(String nameOrAbbrOrLocale) { 957 boolean triedCode = false; 958 if (nameOrAbbrOrLocale.indexOf('_') < 0 && nameOrAbbrOrLocale.indexOf('-') < 0) { 959 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale); 960 if (propNum != UProperty.UNDEFINED) { 961 return new int[] {propNum}; 962 } 963 triedCode = true; 964 } 965 int[] scripts = findCodeFromLocale(new ULocale(nameOrAbbrOrLocale)); 966 if (scripts != null) { 967 return scripts; 968 } 969 if (!triedCode) { 970 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale); 971 if (propNum != UProperty.UNDEFINED) { 972 return new int[] {propNum}; 973 } 974 } 975 return null; 976 } 977 978 /** 979 * Returns the script code associated with the given Unicode script property alias 980 * (name or abbreviation). 981 * Short aliases are ISO 15924 script codes. 982 * Returns MALAYAM given "Malayam" OR "Mlym". 983 * 984 * @param nameOrAbbr name of the script or ISO 15924 code 985 * @return The script code value, or INVALID_CODE if the code cannot be found. 986 */ getCodeFromName(String nameOrAbbr)987 public static final int getCodeFromName(String nameOrAbbr) { 988 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbr); 989 return propNum == UProperty.UNDEFINED ? INVALID_CODE : propNum; 990 } 991 992 /** 993 * Gets the script code associated with the given codepoint. 994 * Returns UScript.MALAYAM given 0x0D02 995 * @param codepoint UChar32 codepoint 996 * @return The script code 997 */ getScript(int codepoint)998 public static final int getScript(int codepoint){ 999 if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) { 1000 int scriptX=UCharacterProperty.INSTANCE.getAdditional(codepoint, 0)&UCharacterProperty.SCRIPT_X_MASK; 1001 int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX); 1002 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1003 return codeOrIndex; 1004 } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) { 1005 return UScript.COMMON; 1006 } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1007 return UScript.INHERITED; 1008 } else { 1009 return UCharacterProperty.INSTANCE.m_scriptExtensions_[codeOrIndex]; 1010 } 1011 }else{ 1012 throw new IllegalArgumentException(Integer.toString(codepoint)); 1013 } 1014 } 1015 1016 /** 1017 * Do the Script_Extensions of code point c contain script sc? 1018 * If c does not have explicit Script_Extensions, then this tests whether 1019 * c has the Script property value sc. 1020 * 1021 * <p>Some characters are commonly used in multiple scripts. 1022 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1023 * 1024 * @param c code point 1025 * @param sc script code 1026 * @return true if sc is in Script_Extensions(c) 1027 */ hasScript(int c, int sc)1028 public static final boolean hasScript(int c, int sc) { 1029 int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK; 1030 int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX); 1031 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1032 return sc==codeOrIndex; 1033 } 1034 1035 char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_; 1036 int scx=codeOrIndex; // index into scriptExtensions 1037 if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1038 scx=scriptExtensions[scx+1]; 1039 } 1040 if(sc>0x7fff) { 1041 // Guard against bogus input that would 1042 // make us go past the Script_Extensions terminator. 1043 return false; 1044 } 1045 while(sc>scriptExtensions[scx]) { 1046 ++scx; 1047 } 1048 return sc==(scriptExtensions[scx]&0x7fff); 1049 } 1050 1051 /** 1052 * Sets code point c's Script_Extensions as script code integers into the output BitSet. 1053 * <ul> 1054 * <li>If c does have Script_Extensions, then the return value is 1055 * the negative number of Script_Extensions codes (= -set.cardinality()); 1056 * in this case, the Script property value 1057 * (normally Common or Inherited) is not included in the set. 1058 * <li>If c does not have Script_Extensions, then the one Script code is put into the set 1059 * and also returned. 1060 * <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set 1061 * and also returned. 1062 * </ul> 1063 * In other words, if the return value is non-negative, it is c's single Script code 1064 * and the set contains exactly this Script code. 1065 * If the return value is -n, then the set contains c's n>=2 Script_Extensions script codes. 1066 * 1067 * <p>Some characters are commonly used in multiple scripts. 1068 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1069 * 1070 * @param c code point 1071 * @param set set of script code integers; will be cleared, then bits are set 1072 * corresponding to c's Script_Extensions 1073 * @return negative number of script codes in c's Script_Extensions, 1074 * or the non-negative single Script value 1075 */ getScriptExtensions(int c, BitSet set)1076 public static final int getScriptExtensions(int c, BitSet set) { 1077 set.clear(); 1078 int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK; 1079 int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX); 1080 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1081 set.set(codeOrIndex); 1082 return codeOrIndex; 1083 } 1084 1085 char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_; 1086 int scx=codeOrIndex; // index into scriptExtensions 1087 if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1088 scx=scriptExtensions[scx+1]; 1089 } 1090 int length=0; 1091 int sx; 1092 do { 1093 sx=scriptExtensions[scx++]; 1094 set.set(sx&0x7fff); 1095 ++length; 1096 } while(sx<0x8000); 1097 // length==set.cardinality() 1098 return -length; 1099 } 1100 1101 /** 1102 * Returns the long Unicode script name, if there is one. 1103 * Otherwise returns the 4-letter ISO 15924 script code. 1104 * Returns "Malayam" given MALAYALAM. 1105 * 1106 * @param scriptCode int script code 1107 * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code 1108 * @throws IllegalArgumentException if the script code is not valid 1109 */ getName(int scriptCode)1110 public static final String getName(int scriptCode){ 1111 return UCharacter.getPropertyValueName(UProperty.SCRIPT, 1112 scriptCode, 1113 UProperty.NameChoice.LONG); 1114 } 1115 1116 /** 1117 * Returns the 4-letter ISO 15924 script code, 1118 * which is the same as the short Unicode script name if Unicode has names for the script. 1119 * Returns "Mlym" given MALAYALAM. 1120 * 1121 * @param scriptCode int script code 1122 * @return short script name (4-letter code) 1123 * @throws IllegalArgumentException if the script code is not valid 1124 */ getShortName(int scriptCode)1125 public static final String getShortName(int scriptCode){ 1126 return UCharacter.getPropertyValueName(UProperty.SCRIPT, 1127 scriptCode, 1128 UProperty.NameChoice.SHORT); 1129 } 1130 1131 /** 1132 * Script metadata (script properties). 1133 * See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt 1134 */ 1135 private static final class ScriptMetadata { 1136 // 0 = NOT_ENCODED, no sample character, default false script properties. 1137 // Bits 20.. 0: sample character 1138 1139 // Bits 23..21: usage 1140 private static final int UNKNOWN = 1 << 21; 1141 private static final int EXCLUSION = 2 << 21; 1142 private static final int LIMITED_USE = 3 << 21; 1143 // vate static final int ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 1144 private static final int RECOMMENDED = 5 << 21; 1145 1146 // Bits 31..24: Single-bit flags 1147 private static final int RTL = 1 << 24; 1148 private static final int LB_LETTERS = 1 << 25; 1149 private static final int CASED = 1 << 26; 1150 1151 private static final int SCRIPT_PROPS[] = { 1152 // Begin copy-paste output from 1153 // tools/trunk/unicode/py/parsescriptmetadata.py 1154 // or from icu/trunk/source/common/uscript_props.cpp 1155 0x0040 | RECOMMENDED, // Zyyy 1156 0x0308 | RECOMMENDED, // Zinh 1157 0x0628 | RECOMMENDED | RTL, // Arab 1158 0x0531 | RECOMMENDED | CASED, // Armn 1159 0x0995 | RECOMMENDED, // Beng 1160 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 1161 0x13C4 | LIMITED_USE | CASED, // Cher 1162 0x03E2 | EXCLUSION | CASED, // Copt 1163 0x042F | RECOMMENDED | CASED, // Cyrl 1164 0x10414 | EXCLUSION | CASED, // Dsrt 1165 0x0905 | RECOMMENDED, // Deva 1166 0x12A0 | RECOMMENDED, // Ethi 1167 0x10D3 | RECOMMENDED, // Geor 1168 0x10330 | EXCLUSION, // Goth 1169 0x03A9 | RECOMMENDED | CASED, // Grek 1170 0x0A95 | RECOMMENDED, // Gujr 1171 0x0A15 | RECOMMENDED, // Guru 1172 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 1173 0xAC00 | RECOMMENDED, // Hang 1174 0x05D0 | RECOMMENDED | RTL, // Hebr 1175 0x304B | RECOMMENDED | LB_LETTERS, // Hira 1176 0x0C95 | RECOMMENDED, // Knda 1177 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 1178 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 1179 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 1180 0x004C | RECOMMENDED | CASED, // Latn 1181 0x0D15 | RECOMMENDED, // Mlym 1182 0x1826 | EXCLUSION, // Mong 1183 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 1184 0x168F | EXCLUSION, // Ogam 1185 0x10300 | EXCLUSION, // Ital 1186 0x0B15 | RECOMMENDED, // Orya 1187 0x16A0 | EXCLUSION, // Runr 1188 0x0D85 | RECOMMENDED, // Sinh 1189 0x0710 | LIMITED_USE | RTL, // Syrc 1190 0x0B95 | RECOMMENDED, // Taml 1191 0x0C15 | RECOMMENDED, // Telu 1192 0x078C | RECOMMENDED | RTL, // Thaa 1193 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 1194 0x0F40 | RECOMMENDED, // Tibt 1195 0x14C0 | LIMITED_USE, // Cans 1196 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii 1197 0x1703 | EXCLUSION, // Tglg 1198 0x1723 | EXCLUSION, // Hano 1199 0x1743 | EXCLUSION, // Buhd 1200 0x1763 | EXCLUSION, // Tagb 1201 0x280E | UNKNOWN, // Brai 1202 0x10800 | EXCLUSION | RTL, // Cprt 1203 0x1900 | LIMITED_USE, // Limb 1204 0x10000 | EXCLUSION, // Linb 1205 0x10480 | EXCLUSION, // Osma 1206 0x10450 | EXCLUSION, // Shaw 1207 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 1208 0x10380 | EXCLUSION, // Ugar 1209 0, 1210 0x1A00 | EXCLUSION, // Bugi 1211 0x2C00 | EXCLUSION | CASED, // Glag 1212 0x10A00 | EXCLUSION | RTL, // Khar 1213 0xA800 | LIMITED_USE, // Sylo 1214 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 1215 0x2D30 | LIMITED_USE, // Tfng 1216 0x103A0 | EXCLUSION, // Xpeo 1217 0x1B05 | LIMITED_USE, // Bali 1218 0x1BC0 | LIMITED_USE, // Batk 1219 0, 1220 0x11005 | EXCLUSION, // Brah 1221 0xAA00 | LIMITED_USE, // Cham 1222 0, 1223 0, 1224 0, 1225 0, 1226 0x13153 | EXCLUSION, // Egyp 1227 0, 1228 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 1229 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 1230 0x16B1C | EXCLUSION, // Hmng 1231 0x10CA1 | EXCLUSION | RTL | CASED, // Hung 1232 0, 1233 0xA984 | LIMITED_USE, // Java 1234 0xA90A | LIMITED_USE, // Kali 1235 0, 1236 0, 1237 0x1C00 | LIMITED_USE, // Lepc 1238 0x10647 | EXCLUSION, // Lina 1239 0x0840 | LIMITED_USE | RTL, // Mand 1240 0, 1241 0x10980 | EXCLUSION | RTL, // Mero 1242 0x07CA | LIMITED_USE | RTL, // Nkoo 1243 0x10C00 | EXCLUSION | RTL, // Orkh 1244 0x1036B | EXCLUSION, // Perm 1245 0xA840 | EXCLUSION, // Phag 1246 0x10900 | EXCLUSION | RTL, // Phnx 1247 0x16F00 | LIMITED_USE, // Plrd 1248 0, 1249 0, 1250 0, 1251 0, 1252 0, 1253 0, 1254 0xA549 | LIMITED_USE, // Vaii 1255 0, 1256 0x12000 | EXCLUSION, // Xsux 1257 0, 1258 0xFDD0 | UNKNOWN, // Zzzz 1259 0x102A0 | EXCLUSION, // Cari 1260 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 1261 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 1262 0x10280 | EXCLUSION, // Lyci 1263 0x10920 | EXCLUSION | RTL, // Lydi 1264 0x1C5A | LIMITED_USE, // Olck 1265 0xA930 | EXCLUSION, // Rjng 1266 0xA882 | LIMITED_USE, // Saur 1267 0x1D850 | EXCLUSION, // Sgnw 1268 0x1B83 | LIMITED_USE, // Sund 1269 0, 1270 0xABC0 | LIMITED_USE, // Mtei 1271 0x10840 | EXCLUSION | RTL, // Armi 1272 0x10B00 | EXCLUSION | RTL, // Avst 1273 0x11103 | LIMITED_USE, // Cakm 1274 0xAC00 | RECOMMENDED, // Kore 1275 0x11083 | EXCLUSION, // Kthi 1276 0x10AD8 | EXCLUSION | RTL, // Mani 1277 0x10B60 | EXCLUSION | RTL, // Phli 1278 0x10B8F | EXCLUSION | RTL, // Phlp 1279 0, 1280 0x10B40 | EXCLUSION | RTL, // Prti 1281 0x0800 | EXCLUSION | RTL, // Samr 1282 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 1283 0, 1284 0, 1285 0xA6A0 | LIMITED_USE, // Bamu 1286 0xA4D0 | LIMITED_USE, // Lisu 1287 0, 1288 0x10A60 | EXCLUSION | RTL, // Sarb 1289 0x16AE6 | EXCLUSION, // Bass 1290 0x1BC20 | EXCLUSION, // Dupl 1291 0x10500 | EXCLUSION, // Elba 1292 0x11315 | EXCLUSION, // Gran 1293 0, 1294 0, 1295 0x1E802 | EXCLUSION | RTL, // Mend 1296 0x109A0 | EXCLUSION | RTL, // Merc 1297 0x10A95 | EXCLUSION | RTL, // Narb 1298 0x10896 | EXCLUSION | RTL, // Nbat 1299 0x10873 | EXCLUSION | RTL, // Palm 1300 0x112BE | EXCLUSION, // Sind 1301 0x118B4 | EXCLUSION | CASED, // Wara 1302 0, 1303 0, 1304 0x16A4F | EXCLUSION, // Mroo 1305 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu 1306 0x11183 | EXCLUSION, // Shrd 1307 0x110D0 | EXCLUSION, // Sora 1308 0x11680 | EXCLUSION, // Takr 1309 0x18229 | EXCLUSION | LB_LETTERS, // Tang 1310 0, 1311 0x14400 | EXCLUSION, // Hluw 1312 0x11208 | EXCLUSION, // Khoj 1313 0x11484 | EXCLUSION, // Tirh 1314 0x10537 | EXCLUSION, // Aghb 1315 0x11152 | EXCLUSION, // Mahj 1316 0x11717 | EXCLUSION | LB_LETTERS, // Ahom 1317 0x108F4 | EXCLUSION | RTL, // Hatr 1318 0x1160E | EXCLUSION, // Modi 1319 0x1128F | EXCLUSION, // Mult 1320 0x11AC0 | EXCLUSION, // Pauc 1321 0x1158E | EXCLUSION, // Sidd 1322 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm 1323 0x11C0E | EXCLUSION, // Bhks 1324 0x11C72 | EXCLUSION, // Marc 1325 0x11412 | LIMITED_USE, // Newa 1326 0x104B5 | LIMITED_USE | CASED, // Osge 1327 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb 1328 0x1112 | RECOMMENDED, // Jamo 1329 0, 1330 0x11D10 | EXCLUSION, // Gonm 1331 0x11A5C | EXCLUSION, // Soyo 1332 0x11A0B | EXCLUSION, // Zanb 1333 0x1180B | EXCLUSION, // Dogr 1334 0x11D71 | LIMITED_USE, // Gong 1335 0x11EE5 | EXCLUSION, // Maka 1336 0x16E40 | EXCLUSION | CASED, // Medf 1337 0x10D12 | LIMITED_USE | RTL, // Rohg 1338 0x10F42 | EXCLUSION | RTL, // Sogd 1339 0x10F19 | EXCLUSION | RTL, // Sogo 1340 0x10FF1 | EXCLUSION | RTL, // Elym 1341 0x1E108 | LIMITED_USE, // Hmnp 1342 0x119CE | EXCLUSION, // Nand 1343 0x1E2E1 | LIMITED_USE, // Wcho 1344 0x10FBF | EXCLUSION | RTL, // Chrs 1345 0x1190C | EXCLUSION, // Diak 1346 0x18C65 | EXCLUSION | LB_LETTERS, // Kits 1347 0x10E88 | EXCLUSION | RTL, // Yezi 1348 // End copy-paste from parsescriptmetadata.py 1349 }; 1350 getScriptProps(int script)1351 private static final int getScriptProps(int script) { 1352 if (0 <= script && script < SCRIPT_PROPS.length) { 1353 return SCRIPT_PROPS[script]; 1354 } else { 1355 return 0; 1356 } 1357 } 1358 } 1359 1360 /** 1361 * Script usage constants. 1362 * See UAX #31 Unicode Identifier and Pattern Syntax. 1363 * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers 1364 */ 1365 public enum ScriptUsage { 1366 /** 1367 * Not encoded in Unicode. 1368 */ 1369 NOT_ENCODED, 1370 /** 1371 * Unknown script usage. 1372 */ 1373 UNKNOWN, 1374 /** 1375 * Candidate for Exclusion from Identifiers. 1376 */ 1377 EXCLUDED, 1378 /** 1379 * Limited Use script. 1380 */ 1381 LIMITED_USE, 1382 /** 1383 * Aspirational Use script. 1384 */ 1385 ASPIRATIONAL, 1386 /** 1387 * Recommended script. 1388 */ 1389 RECOMMENDED 1390 } 1391 private static final ScriptUsage[] usageValues = ScriptUsage.values(); 1392 1393 /** 1394 * Returns the script sample character string. 1395 * This string normally consists of one code point but might be longer. 1396 * The string is empty if the script is not encoded. 1397 * 1398 * @param script script code 1399 * @return the sample character string 1400 */ getSampleString(int script)1401 public static final String getSampleString(int script) { 1402 int sampleChar = ScriptMetadata.getScriptProps(script) & 0x1fffff; 1403 if(sampleChar != 0) { 1404 return new StringBuilder().appendCodePoint(sampleChar).toString(); 1405 } 1406 return ""; 1407 } 1408 1409 /** 1410 * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. 1411 * Returns {@link ScriptUsage#NOT_ENCODED} if the script is not encoded in Unicode. 1412 * 1413 * @param script script code 1414 * @return script usage 1415 * @see ScriptUsage 1416 */ getUsage(int script)1417 public static final ScriptUsage getUsage(int script) { 1418 return usageValues[(ScriptMetadata.getScriptProps(script) >> 21) & 7]; 1419 } 1420 1421 /** 1422 * Returns true if the script is written right-to-left. 1423 * For example, Arab and Hebr. 1424 * 1425 * @param script script code 1426 * @return true if the script is right-to-left 1427 */ isRightToLeft(int script)1428 public static final boolean isRightToLeft(int script) { 1429 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.RTL) != 0; 1430 } 1431 1432 /** 1433 * Returns true if the script allows line breaks between letters (excluding hyphenation). 1434 * Such a script typically requires dictionary-based line breaking. 1435 * For example, Hani and Thai. 1436 * 1437 * @param script script code 1438 * @return true if the script allows line breaks between letters 1439 */ breaksBetweenLetters(int script)1440 public static final boolean breaksBetweenLetters(int script) { 1441 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.LB_LETTERS) != 0; 1442 } 1443 1444 /** 1445 * Returns true if in modern (or most recent) usage of the script case distinctions are customary. 1446 * For example, Latn and Cyrl. 1447 * 1448 * @param script script code 1449 * @return true if the script is cased 1450 */ isCased(int script)1451 public static final boolean isCased(int script) { 1452 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.CASED) != 0; 1453 } 1454 1455 ///CLOVER:OFF 1456 /** 1457 * Private Constructor. Never default construct 1458 */ UScript()1459 private UScript(){} 1460 ///CLOVER:ON 1461 } 1462