1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2001-2016 International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 import java.util.BitSet; 13 import java.util.Locale; 14 15 import com.ibm.icu.impl.UCharacterProperty; 16 import com.ibm.icu.util.ULocale; 17 18 /** 19 * Constants for ISO 15924 script codes, and related functions. 20 * 21 * <p>The current set of script code constants supports at least all scripts 22 * that are encoded in the version of Unicode which ICU currently supports. 23 * The names of the constants are usually derived from the 24 * Unicode script property value aliases. 25 * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) 26 * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . 27 * 28 * <p>In addition, constants for many ISO 15924 script codes 29 * are included, for use with language tags, CLDR data, and similar. 30 * Some of those codes are not used in the Unicode Character Database (UCD). 31 * For example, there are no characters that have a UCD script property value of 32 * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. 33 * 34 * <p>Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR. 35 * 36 * <p>Starting with ICU 55, script codes are only added when their scripts 37 * have been or will certainly be encoded in Unicode, 38 * and have been assigned Unicode script property value aliases, 39 * to ensure that their script names are stable and match the names of the constants. 40 * Script codes like Latf and Aran that are not subject to separate encoding 41 * may be added at any time. 42 * 43 * @stable ICU 2.4 44 */ 45 public final class UScript { 46 /** 47 * Invalid code 48 * @stable ICU 2.4 49 */ 50 public static final int INVALID_CODE = -1; 51 /** 52 * Common 53 * @stable ICU 2.4 54 */ 55 public static final int COMMON = 0; /* Zyyy */ 56 /** 57 * Inherited 58 * @stable ICU 2.4 59 */ 60 public static final int INHERITED = 1; /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ 61 /** 62 * Arabic 63 * @stable ICU 2.4 64 */ 65 public static final int ARABIC = 2; /* Arab */ 66 /** 67 * Armenian 68 * @stable ICU 2.4 69 */ 70 public static final int ARMENIAN = 3; /* Armn */ 71 /** 72 * Bengali 73 * @stable ICU 2.4 74 */ 75 public static final int BENGALI = 4; /* Beng */ 76 /** 77 * Bopomofo 78 * @stable ICU 2.4 79 */ 80 public static final int BOPOMOFO = 5; /* Bopo */ 81 /** 82 * Cherokee 83 * @stable ICU 2.4 84 */ 85 public static final int CHEROKEE = 6; /* Cher */ 86 /** 87 * Coptic 88 * @stable ICU 2.4 89 */ 90 public static final int COPTIC = 7; /* Qaac */ 91 /** 92 * Cyrillic 93 * @stable ICU 2.4 94 */ 95 public static final int CYRILLIC = 8; /* Cyrl (Cyrs) */ 96 /** 97 * Deseret 98 * @stable ICU 2.4 99 */ 100 public static final int DESERET = 9; /* Dsrt */ 101 /** 102 * Devanagari 103 * @stable ICU 2.4 104 */ 105 public static final int DEVANAGARI = 10; /* Deva */ 106 /** 107 * Ethiopic 108 * @stable ICU 2.4 109 */ 110 public static final int ETHIOPIC = 11; /* Ethi */ 111 /** 112 * Georgian 113 * @stable ICU 2.4 114 */ 115 public static final int GEORGIAN = 12; /* Geor (Geon; Geoa) */ 116 /** 117 * Gothic 118 * @stable ICU 2.4 119 */ 120 public static final int GOTHIC = 13; /* Goth */ 121 /** 122 * Greek 123 * @stable ICU 2.4 124 */ 125 public static final int GREEK = 14; /* Grek */ 126 /** 127 * Gujarati 128 * @stable ICU 2.4 129 */ 130 public static final int GUJARATI = 15; /* Gujr */ 131 /** 132 * Gurmukhi 133 * @stable ICU 2.4 134 */ 135 public static final int GURMUKHI = 16; /* Guru */ 136 /** 137 * Han 138 * @stable ICU 2.4 139 */ 140 public static final int HAN = 17; /* Hani */ 141 /** 142 * Hangul 143 * @stable ICU 2.4 144 */ 145 public static final int HANGUL = 18; /* Hang */ 146 /** 147 * Hebrew 148 * @stable ICU 2.4 149 */ 150 public static final int HEBREW = 19; /* Hebr */ 151 /** 152 * Hiragana 153 * @stable ICU 2.4 154 */ 155 public static final int HIRAGANA = 20; /* Hira */ 156 /** 157 * Kannada 158 * @stable ICU 2.4 159 */ 160 public static final int KANNADA = 21; /* Knda */ 161 /** 162 * Katakana 163 * @stable ICU 2.4 164 */ 165 public static final int KATAKANA = 22; /* Kana */ 166 /** 167 * Khmer 168 * @stable ICU 2.4 169 */ 170 public static final int KHMER = 23; /* Khmr */ 171 /** 172 * Lao 173 * @stable ICU 2.4 174 */ 175 public static final int LAO = 24; /* Laoo */ 176 /** 177 * Latin 178 * @stable ICU 2.4 179 */ 180 public static final int LATIN = 25; /* Latn (Latf; Latg) */ 181 /** 182 * Malayalam 183 * @stable ICU 2.4 184 */ 185 public static final int MALAYALAM = 26; /* Mlym */ 186 /** 187 * Mangolian 188 * @stable ICU 2.4 189 */ 190 public static final int MONGOLIAN = 27; /* Mong */ 191 /** 192 * Myammar 193 * @stable ICU 2.4 194 */ 195 public static final int MYANMAR = 28; /* Mymr */ 196 /** 197 * Ogham 198 * @stable ICU 2.4 199 */ 200 public static final int OGHAM = 29; /* Ogam */ 201 /** 202 * Old Itallic 203 * @stable ICU 2.4 204 */ 205 public static final int OLD_ITALIC = 30; /* Ital */ 206 /** 207 * Oriya 208 * @stable ICU 2.4 209 */ 210 public static final int ORIYA = 31; /* Orya */ 211 /** 212 * Runic 213 * @stable ICU 2.4 214 */ 215 public static final int RUNIC = 32; /* Runr */ 216 /** 217 * Sinhala 218 * @stable ICU 2.4 219 */ 220 public static final int SINHALA = 33; /* Sinh */ 221 /** 222 * Syriac 223 * @stable ICU 2.4 224 */ 225 public static final int SYRIAC = 34; /* Syrc (Syrj; Syrn; Syre) */ 226 /** 227 * Tamil 228 * @stable ICU 2.4 229 */ 230 public static final int TAMIL = 35; /* Taml */ 231 /** 232 * Telugu 233 * @stable ICU 2.4 234 */ 235 public static final int TELUGU = 36; /* Telu */ 236 /** 237 * Thana 238 * @stable ICU 2.4 239 */ 240 public static final int THAANA = 37; /* Thaa */ 241 /** 242 * Thai 243 * @stable ICU 2.4 244 */ 245 public static final int THAI = 38; /* Thai */ 246 /** 247 * Tibetan 248 * @stable ICU 2.4 249 */ 250 public static final int TIBETAN = 39; /* Tibt */ 251 /** 252 * Unified Canadian Aboriginal Symbols 253 * @stable ICU 2.6 254 */ 255 public static final int CANADIAN_ABORIGINAL = 40; /* Cans */ 256 /** 257 * Unified Canadian Aboriginal Symbols (alias) 258 * @stable ICU 2.4 259 */ 260 public static final int UCAS = CANADIAN_ABORIGINAL; /* Cans */ 261 /** 262 * Yi syllables 263 * @stable ICU 2.4 264 */ 265 public static final int YI = 41; /* Yiii */ 266 /** 267 * Tagalog 268 * @stable ICU 2.4 269 */ 270 public static final int TAGALOG = 42; /* Tglg */ 271 /** 272 * Hanunooo 273 * @stable ICU 2.4 274 */ 275 public static final int HANUNOO = 43; /* Hano */ 276 /** 277 * Buhid 278 * @stable ICU 2.4 279 */ 280 public static final int BUHID = 44; /* Buhd */ 281 /** 282 * Tagbanwa 283 * @stable ICU 2.4 284 */ 285 public static final int TAGBANWA = 45; /* Tagb */ 286 /** 287 * Braille 288 * Script in Unicode 4 289 * @stable ICU 2.6 290 * 291 */ 292 public static final int BRAILLE = 46; /* Brai */ 293 /** 294 * Cypriot 295 * Script in Unicode 4 296 * @stable ICU 2.6 297 * 298 */ 299 public static final int CYPRIOT = 47; /* Cprt */ 300 /** 301 * Limbu 302 * Script in Unicode 4 303 * @stable ICU 2.6 304 * 305 */ 306 public static final int LIMBU = 48; /* Limb */ 307 /** 308 * Linear B 309 * Script in Unicode 4 310 * @stable ICU 2.6 311 * 312 */ 313 public static final int LINEAR_B = 49; /* Linb */ 314 /** 315 * Osmanya 316 * Script in Unicode 4 317 * @stable ICU 2.6 318 * 319 */ 320 public static final int OSMANYA = 50; /* Osma */ 321 /** 322 * Shavian 323 * Script in Unicode 4 324 * @stable ICU 2.6 325 * 326 */ 327 public static final int SHAVIAN = 51; /* Shaw */ 328 /** 329 * Tai Le 330 * Script in Unicode 4 331 * @stable ICU 2.6 332 * 333 */ 334 public static final int TAI_LE = 52; /* Tale */ 335 /** 336 * Ugaritic 337 * Script in Unicode 4 338 * @stable ICU 2.6 339 * 340 */ 341 public static final int UGARITIC = 53; /* Ugar */ 342 /** 343 * Script in Unicode 4.0.1 344 * @stable ICU 3.0 345 */ 346 public static final int KATAKANA_OR_HIRAGANA = 54; /*Hrkt */ 347 348 /** 349 * Script in Unicode 4.1 350 * @stable ICU 3.4 351 */ 352 public static final int BUGINESE = 55; /* Bugi */ 353 /** 354 * Script in Unicode 4.1 355 * @stable ICU 3.4 356 */ 357 public static final int GLAGOLITIC = 56; /* Glag */ 358 /** 359 * Script in Unicode 4.1 360 * @stable ICU 3.4 361 */ 362 public static final int KHAROSHTHI = 57; /* Khar */ 363 /** 364 * Script in Unicode 4.1 365 * @stable ICU 3.4 366 */ 367 public static final int SYLOTI_NAGRI = 58; /* Sylo */ 368 /** 369 * Script in Unicode 4.1 370 * @stable ICU 3.4 371 */ 372 public static final int NEW_TAI_LUE = 59; /* Talu */ 373 /** 374 * Script in Unicode 4.1 375 * @stable ICU 3.4 376 */ 377 public static final int TIFINAGH = 60; /* Tfng */ 378 /** 379 * Script in Unicode 4.1 380 * @stable ICU 3.4 381 */ 382 public static final int OLD_PERSIAN = 61; /* Xpeo */ 383 384 385 /** 386 * ISO 15924 script code 387 * @stable ICU 3.6 388 */ 389 public static final int BALINESE = 62; /* Bali */ 390 /** 391 * ISO 15924 script code 392 * @stable ICU 3.6 393 */ 394 public static final int BATAK = 63; /* Batk */ 395 /** 396 * ISO 15924 script code 397 * @stable ICU 3.6 398 */ 399 public static final int BLISSYMBOLS = 64; /* Blis */ 400 /** 401 * ISO 15924 script code 402 * @stable ICU 3.6 403 */ 404 public static final int BRAHMI = 65; /* Brah */ 405 /** 406 * ISO 15924 script code 407 * @stable ICU 3.6 408 */ 409 public static final int CHAM = 66; /* Cham */ 410 /** 411 * ISO 15924 script code 412 * @stable ICU 3.6 413 */ 414 public static final int CIRTH = 67; /* Cirt */ 415 /** 416 * ISO 15924 script code 417 * @stable ICU 3.6 418 */ 419 public static final int OLD_CHURCH_SLAVONIC_CYRILLIC = 68; /* Cyrs */ 420 /** 421 * ISO 15924 script code 422 * @stable ICU 3.6 423 */ 424 public static final int DEMOTIC_EGYPTIAN = 69; /* Egyd */ 425 /** 426 * ISO 15924 script code 427 * @stable ICU 3.6 428 */ 429 public static final int HIERATIC_EGYPTIAN = 70; /* Egyh */ 430 /** 431 * ISO 15924 script code 432 * @stable ICU 3.6 433 */ 434 public static final int EGYPTIAN_HIEROGLYPHS = 71; /* Egyp */ 435 /** 436 * ISO 15924 script code 437 * @stable ICU 3.6 438 */ 439 public static final int KHUTSURI = 72; /* Geok */ 440 /** 441 * ISO 15924 script code 442 * @stable ICU 3.6 443 */ 444 public static final int SIMPLIFIED_HAN = 73; /* Hans */ 445 /** 446 * ISO 15924 script code 447 * @stable ICU 3.6 448 */ 449 public static final int TRADITIONAL_HAN = 74; /* Hant */ 450 /** 451 * ISO 15924 script code 452 * @stable ICU 3.6 453 */ 454 public static final int PAHAWH_HMONG = 75; /* Hmng */ 455 /** 456 * ISO 15924 script code 457 * @stable ICU 3.6 458 */ 459 public static final int OLD_HUNGARIAN = 76; /* Hung */ 460 /** 461 * ISO 15924 script code 462 * @stable ICU 3.6 463 */ 464 public static final int HARAPPAN_INDUS = 77; /* Inds */ 465 /** 466 * ISO 15924 script code 467 * @stable ICU 3.6 468 */ 469 public static final int JAVANESE = 78; /* Java */ 470 /** 471 * ISO 15924 script code 472 * @stable ICU 3.6 473 */ 474 public static final int KAYAH_LI = 79; /* Kali */ 475 /** 476 * ISO 15924 script code 477 * @stable ICU 3.6 478 */ 479 public static final int LATIN_FRAKTUR = 80; /* Latf */ 480 /** 481 * ISO 15924 script code 482 * @stable ICU 3.6 483 */ 484 public static final int LATIN_GAELIC = 81; /* Latg */ 485 /** 486 * ISO 15924 script code 487 * @stable ICU 3.6 488 */ 489 public static final int LEPCHA = 82; /* Lepc */ 490 /** 491 * ISO 15924 script code 492 * @stable ICU 3.6 493 */ 494 public static final int LINEAR_A = 83; /* Lina */ 495 /** 496 * ISO 15924 script code 497 * @stable ICU 4.6 498 */ 499 public static final int MANDAIC = 84; /* Mand */ 500 /** 501 * ISO 15924 script code 502 * @stable ICU 3.6 503 */ 504 public static final int MANDAEAN = MANDAIC; 505 /** 506 * ISO 15924 script code 507 * @stable ICU 3.6 508 */ 509 public static final int MAYAN_HIEROGLYPHS = 85; /* Maya */ 510 /** 511 * ISO 15924 script code 512 * @stable ICU 4.6 513 */ 514 public static final int MEROITIC_HIEROGLYPHS = 86; /* Mero */ 515 /** 516 * ISO 15924 script code 517 * @stable ICU 3.6 518 */ 519 public static final int MEROITIC = MEROITIC_HIEROGLYPHS; 520 /** 521 * ISO 15924 script code 522 * @stable ICU 3.6 523 */ 524 public static final int NKO = 87; /* Nkoo */ 525 /** 526 * ISO 15924 script code 527 * @stable ICU 3.6 528 */ 529 public static final int ORKHON = 88; /* Orkh */ 530 /** 531 * ISO 15924 script code 532 * @stable ICU 3.6 533 */ 534 public static final int OLD_PERMIC = 89; /* Perm */ 535 /** 536 * ISO 15924 script code 537 * @stable ICU 3.6 538 */ 539 public static final int PHAGS_PA = 90; /* Phag */ 540 /** 541 * ISO 15924 script code 542 * @stable ICU 3.6 543 */ 544 public static final int PHOENICIAN = 91; /* Phnx */ 545 /** 546 * ISO 15924 script code 547 * @stable ICU 52 548 */ 549 public static final int MIAO = 92; /* Plrd */ 550 /** 551 * ISO 15924 script code 552 * @stable ICU 3.6 553 */ 554 public static final int PHONETIC_POLLARD = MIAO; 555 /** 556 * ISO 15924 script code 557 * @stable ICU 3.6 558 */ 559 public static final int RONGORONGO = 93; /* Roro */ 560 /** 561 * ISO 15924 script code 562 * @stable ICU 3.6 563 */ 564 public static final int SARATI = 94; /* Sara */ 565 /** 566 * ISO 15924 script code 567 * @stable ICU 3.6 568 */ 569 public static final int ESTRANGELO_SYRIAC = 95; /* Syre */ 570 /** 571 * ISO 15924 script code 572 * @stable ICU 3.6 573 */ 574 public static final int WESTERN_SYRIAC = 96; /* Syrj */ 575 /** 576 * ISO 15924 script code 577 * @stable ICU 3.6 578 */ 579 public static final int EASTERN_SYRIAC = 97; /* Syrn */ 580 /** 581 * ISO 15924 script code 582 * @stable ICU 3.6 583 */ 584 public static final int TENGWAR = 98; /* Teng */ 585 /** 586 * ISO 15924 script code 587 * @stable ICU 3.6 588 */ 589 public static final int VAI = 99; /* Vaii */ 590 /** 591 * ISO 15924 script code 592 * @stable ICU 3.6 593 */ 594 public static final int VISIBLE_SPEECH = 100;/* Visp */ 595 /** 596 * ISO 15924 script code 597 * @stable ICU 3.6 598 */ 599 public static final int CUNEIFORM = 101;/* Xsux */ 600 /** 601 * ISO 15924 script code 602 * @stable ICU 3.6 603 */ 604 public static final int UNWRITTEN_LANGUAGES = 102;/* Zxxx */ 605 /** 606 * ISO 15924 script code 607 * @stable ICU 3.6 608 */ 609 public static final int UNKNOWN = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ 610 611 /** 612 * ISO 15924 script code 613 * @stable ICU 3.8 614 */ 615 public static final int CARIAN = 104;/* Cari */ 616 /** 617 * ISO 15924 script code 618 * @stable ICU 3.8 619 */ 620 public static final int JAPANESE = 105;/* Jpan */ 621 /** 622 * ISO 15924 script code 623 * @stable ICU 3.8 624 */ 625 public static final int LANNA = 106;/* Lana */ 626 /** 627 * ISO 15924 script code 628 * @stable ICU 3.8 629 */ 630 public static final int LYCIAN = 107;/* Lyci */ 631 /** 632 * ISO 15924 script code 633 * @stable ICU 3.8 634 */ 635 public static final int LYDIAN = 108;/* Lydi */ 636 /** 637 * ISO 15924 script code 638 * @stable ICU 3.8 639 */ 640 public static final int OL_CHIKI = 109;/* Olck */ 641 /** 642 * ISO 15924 script code 643 * @stable ICU 3.8 644 */ 645 public static final int REJANG = 110;/* Rjng */ 646 /** 647 * ISO 15924 script code 648 * @stable ICU 3.8 649 */ 650 public static final int SAURASHTRA = 111;/* Saur */ 651 /** 652 * ISO 15924 script code for Sutton SignWriting 653 * @stable ICU 3.8 654 */ 655 public static final int SIGN_WRITING = 112;/* Sgnw */ 656 /** 657 * ISO 15924 script code 658 * @stable ICU 3.8 659 */ 660 public static final int SUNDANESE = 113;/* Sund */ 661 /** 662 * ISO 15924 script code 663 * @stable ICU 3.8 664 */ 665 public static final int MOON = 114;/* Moon */ 666 /** 667 * ISO 15924 script code 668 * @stable ICU 3.8 669 */ 670 public static final int MEITEI_MAYEK = 115;/* Mtei */ 671 672 /** 673 * ISO 15924 script code 674 * @stable ICU 4.0 675 */ 676 public static final int IMPERIAL_ARAMAIC = 116;/* Armi */ 677 678 /** 679 * ISO 15924 script code 680 * @stable ICU 4.0 681 */ 682 public static final int AVESTAN = 117;/* Avst */ 683 684 /** 685 * ISO 15924 script code 686 * @stable ICU 4.0 687 */ 688 public static final int CHAKMA = 118;/* Cakm */ 689 690 /** 691 * ISO 15924 script code 692 * @stable ICU 4.0 693 */ 694 public static final int KOREAN = 119;/* Kore */ 695 696 /** 697 * ISO 15924 script code 698 * @stable ICU 4.0 699 */ 700 public static final int KAITHI = 120;/* Kthi */ 701 702 /** 703 * ISO 15924 script code 704 * @stable ICU 4.0 705 */ 706 public static final int MANICHAEAN = 121;/* Mani */ 707 708 /** 709 * ISO 15924 script code 710 * @stable ICU 4.0 711 */ 712 public static final int INSCRIPTIONAL_PAHLAVI = 122;/* Phli */ 713 714 /** 715 * ISO 15924 script code 716 * @stable ICU 4.0 717 */ 718 public static final int PSALTER_PAHLAVI = 123;/* Phlp */ 719 720 /** 721 * ISO 15924 script code 722 * @stable ICU 4.0 723 */ 724 public static final int BOOK_PAHLAVI = 124;/* Phlv */ 725 726 /** 727 * ISO 15924 script code 728 * @stable ICU 4.0 729 */ 730 public static final int INSCRIPTIONAL_PARTHIAN = 125;/* Prti */ 731 732 /** 733 * ISO 15924 script code 734 * @stable ICU 4.0 735 */ 736 public static final int SAMARITAN = 126;/* Samr */ 737 738 /** 739 * ISO 15924 script code 740 * @stable ICU 4.0 741 */ 742 public static final int TAI_VIET = 127;/* Tavt */ 743 744 /** 745 * ISO 15924 script code 746 * @stable ICU 4.0 747 */ 748 public static final int MATHEMATICAL_NOTATION = 128;/* Zmth */ 749 750 /** 751 * ISO 15924 script code 752 * @stable ICU 4.0 753 */ 754 public static final int SYMBOLS = 129;/* Zsym */ 755 756 /** 757 * ISO 15924 script code 758 * @stable ICU 4.4 759 */ 760 public static final int BAMUM = 130;/* Bamu */ 761 /** 762 * ISO 15924 script code 763 * @stable ICU 4.4 764 */ 765 public static final int LISU = 131;/* Lisu */ 766 /** 767 * ISO 15924 script code 768 * @stable ICU 4.4 769 */ 770 public static final int NAKHI_GEBA = 132;/* Nkgb */ 771 /** 772 * ISO 15924 script code 773 * @stable ICU 4.4 774 */ 775 public static final int OLD_SOUTH_ARABIAN = 133;/* Sarb */ 776 777 /** 778 * ISO 15924 script code 779 * @stable ICU 4.6 780 */ 781 public static final int BASSA_VAH = 134;/* Bass */ 782 /** 783 * ISO 15924 script code 784 * @stable ICU 54 785 */ 786 public static final int DUPLOYAN = 135;/* Dupl */ 787 /** 788 * Typo, use DUPLOYAN 789 * @deprecated ICU 54 790 */ 791 @Deprecated 792 public static final int DUPLOYAN_SHORTAND = DUPLOYAN; 793 /** 794 * ISO 15924 script code 795 * @stable ICU 4.6 796 */ 797 public static final int ELBASAN = 136;/* Elba */ 798 /** 799 * ISO 15924 script code 800 * @stable ICU 4.6 801 */ 802 public static final int GRANTHA = 137;/* Gran */ 803 /** 804 * ISO 15924 script code 805 * @stable ICU 4.6 806 */ 807 public static final int KPELLE = 138;/* Kpel */ 808 /** 809 * ISO 15924 script code 810 * @stable ICU 4.6 811 */ 812 public static final int LOMA = 139;/* Loma */ 813 /** 814 * Mende Kikakui 815 * ISO 15924 script code 816 * @stable ICU 4.6 817 */ 818 public static final int MENDE = 140;/* Mend */ 819 /** 820 * ISO 15924 script code 821 * @stable ICU 4.6 822 */ 823 public static final int MEROITIC_CURSIVE = 141;/* Merc */ 824 /** 825 * ISO 15924 script code 826 * @stable ICU 4.6 827 */ 828 public static final int OLD_NORTH_ARABIAN = 142;/* Narb */ 829 /** 830 * ISO 15924 script code 831 * @stable ICU 4.6 832 */ 833 public static final int NABATAEAN = 143;/* Nbat */ 834 /** 835 * ISO 15924 script code 836 * @stable ICU 4.6 837 */ 838 public static final int PALMYRENE = 144;/* Palm */ 839 /** 840 * ISO 15924 script code 841 * @stable ICU 54 842 */ 843 public static final int KHUDAWADI = 145;/* Sind */ 844 /** 845 * ISO 15924 script code 846 * @stable ICU 4.6 847 */ 848 public static final int SINDHI = KHUDAWADI; 849 /** 850 * ISO 15924 script code 851 * @stable ICU 4.6 852 */ 853 public static final int WARANG_CITI = 146;/* Wara */ 854 855 /** 856 * ISO 15924 script code 857 * @stable ICU 4.8 858 */ 859 public static final int AFAKA = 147;/* Afak */ 860 /** 861 * ISO 15924 script code 862 * @stable ICU 4.8 863 */ 864 public static final int JURCHEN = 148;/* Jurc */ 865 /** 866 * ISO 15924 script code 867 * @stable ICU 4.8 868 */ 869 public static final int MRO = 149;/* Mroo */ 870 /** 871 * ISO 15924 script code 872 * @stable ICU 4.8 873 */ 874 public static final int NUSHU = 150;/* Nshu */ 875 /** 876 * ISO 15924 script code 877 * @stable ICU 4.8 878 */ 879 public static final int SHARADA = 151;/* Shrd */ 880 /** 881 * ISO 15924 script code 882 * @stable ICU 4.8 883 */ 884 public static final int SORA_SOMPENG = 152;/* Sora */ 885 /** 886 * ISO 15924 script code 887 * @stable ICU 4.8 888 */ 889 public static final int TAKRI = 153;/* Takr */ 890 /** 891 * ISO 15924 script code 892 * @stable ICU 4.8 893 */ 894 public static final int TANGUT = 154;/* Tang */ 895 /** 896 * ISO 15924 script code 897 * @stable ICU 4.8 898 */ 899 public static final int WOLEAI = 155;/* Wole */ 900 901 /** 902 * ISO 15924 script code 903 * @stable ICU 49 904 */ 905 public static final int ANATOLIAN_HIEROGLYPHS = 156;/* Hluw */ 906 /** 907 * ISO 15924 script code 908 * @stable ICU 49 909 */ 910 public static final int KHOJKI = 157;/* Khoj */ 911 /** 912 * ISO 15924 script code 913 * @stable ICU 49 914 */ 915 public static final int TIRHUTA = 158;/* Tirh */ 916 /** 917 * ISO 15924 script code 918 * @stable ICU 52 919 */ 920 public static final int CAUCASIAN_ALBANIAN = 159; /* Aghb */ 921 /** 922 * ISO 15924 script code 923 * @stable ICU 52 924 */ 925 public static final int MAHAJANI = 160; /* Mahj */ 926 927 /** 928 * ISO 15924 script code 929 * @stable ICU 54 930 */ 931 public static final int AHOM = 161; /* Ahom */ 932 /** 933 * ISO 15924 script code 934 * @stable ICU 54 935 */ 936 public static final int HATRAN = 162; /* Hatr */ 937 /** 938 * ISO 15924 script code 939 * @stable ICU 54 940 */ 941 public static final int MODI = 163; /* Modi */ 942 /** 943 * ISO 15924 script code 944 * @stable ICU 54 945 */ 946 public static final int MULTANI = 164; /* Mult */ 947 /** 948 * ISO 15924 script code 949 * @stable ICU 54 950 */ 951 public static final int PAU_CIN_HAU = 165; /* Pauc */ 952 /** 953 * ISO 15924 script code 954 * @stable ICU 54 955 */ 956 public static final int SIDDHAM = 166; /* Sidd */ 957 958 /** 959 * ISO 15924 script code 960 * @stable ICU 58 961 */ 962 public static final int ADLAM = 167; /* Adlm */ 963 /** 964 * ISO 15924 script code 965 * @stable ICU 58 966 */ 967 public static final int BHAIKSUKI = 168; /* Bhks */ 968 /** 969 * ISO 15924 script code 970 * @stable ICU 58 971 */ 972 public static final int MARCHEN = 169; /* Marc */ 973 /** 974 * ISO 15924 script code 975 * @stable ICU 58 976 */ 977 public static final int NEWA = 170; /* Newa */ 978 /** 979 * ISO 15924 script code 980 * @stable ICU 58 981 */ 982 public static final int OSAGE = 171; /* Osge */ 983 984 /** 985 * ISO 15924 script code 986 * @stable ICU 58 987 */ 988 public static final int HAN_WITH_BOPOMOFO = 172; /* Hanb */ 989 /** 990 * ISO 15924 script code 991 * @stable ICU 58 992 */ 993 public static final int JAMO = 173; /* Jamo */ 994 /** 995 * ISO 15924 script code 996 * @stable ICU 58 997 */ 998 public static final int SYMBOLS_EMOJI = 174; /* Zsye */ 999 1000 /** 1001 * ISO 15924 script code 1002 * @stable ICU 60 1003 */ 1004 public static final int MASARAM_GONDI = 175; /* Gonm */ 1005 /** 1006 * ISO 15924 script code 1007 * @stable ICU 60 1008 */ 1009 public static final int SOYOMBO = 176; /* Soyo */ 1010 /** 1011 * ISO 15924 script code 1012 * @stable ICU 60 1013 */ 1014 public static final int ZANABAZAR_SQUARE = 177; /* Zanb */ 1015 1016 /** 1017 * ISO 15924 script code 1018 * @stable ICU 62 1019 */ 1020 public static final int DOGRA = 178; /* Dogr */ 1021 /** @stable ICU 62 */ 1022 public static final int GUNJALA_GONDI = 179; /* Gong */ 1023 /** @stable ICU 62 */ 1024 public static final int MAKASAR = 180; /* Maka */ 1025 /** @stable ICU 62 */ 1026 public static final int MEDEFAIDRIN = 181; /* Medf */ 1027 /** @stable ICU 62 */ 1028 public static final int HANIFI_ROHINGYA = 182; /* Rohg */ 1029 /** @stable ICU 62 */ 1030 public static final int SOGDIAN = 183; /* Sogd */ 1031 /** @stable ICU 62 */ 1032 public static final int OLD_SOGDIAN = 184; /* Sogo */ 1033 1034 /** @stable ICU 64 */ 1035 public static final int ELYMAIC = 185; /* Elym */ 1036 /** @stable ICU 64 */ 1037 public static final int NYIAKENG_PUACHUE_HMONG = 186; /* Hmnp */ 1038 /** @stable ICU 64 */ 1039 public static final int NANDINAGARI = 187; /* Nand */ 1040 /** @stable ICU 64 */ 1041 public static final int WANCHO = 188; /* Wcho */ 1042 1043 /** @stable ICU 66 */ 1044 public static final int CHORASMIAN = 189; /* Chrs */ 1045 /** @stable ICU 66 */ 1046 public static final int DIVES_AKURU = 190; /* Diak */ 1047 /** @stable ICU 66 */ 1048 public static final int KHITAN_SMALL_SCRIPT = 191; /* Kits */ 1049 /** @stable ICU 66 */ 1050 public static final int YEZIDI = 192; /* Yezi */ 1051 1052 /** @stable ICU 70 */ 1053 public static final int CYPRO_MINOAN = 193; /* Cpmn */ 1054 /** @stable ICU 70 */ 1055 public static final int OLD_UYGHUR = 194; /* Ougr */ 1056 /** @stable ICU 70 */ 1057 public static final int TANGSA = 195; /* Tnsa */ 1058 /** @stable ICU 70 */ 1059 public static final int TOTO = 196; /* Toto */ 1060 /** @stable ICU 70 */ 1061 public static final int VITHKUQI = 197; /* Vith */ 1062 1063 /** @stable ICU 72 */ 1064 public static final int KAWI = 198; /* Kawi */ 1065 /** @stable ICU 72 */ 1066 public static final int NAG_MUNDARI = 199; /* Nagm */ 1067 1068 /** @stable ICU 75 */ 1069 public static final int ARABIC_NASTALIQ = 200; /* Aran */ 1070 1071 /** 1072 * One more than the highest normal UScript code. 1073 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT). 1074 * 1075 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1076 */ 1077 @Deprecated 1078 public static final int CODE_LIMIT = 201; 1079 getCodesFromLocale(ULocale locale)1080 private static int[] getCodesFromLocale(ULocale locale) { 1081 // Multi-script languages, equivalent to the LocaleScript data 1082 // that we used to load from locale resource bundles. 1083 String lang = locale.getLanguage(); 1084 if(lang.equals("ja")) { 1085 return new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN }; 1086 } 1087 if(lang.equals("ko")) { 1088 return new int[] { UScript.HANGUL, UScript.HAN }; 1089 } 1090 String script = locale.getScript(); 1091 if(lang.equals("zh") && script.equals("Hant")) { 1092 return new int[] { UScript.HAN, UScript.BOPOMOFO }; 1093 } 1094 // Explicit script code. 1095 if(script.length() != 0) { 1096 int scriptCode = UScript.getCodeFromName(script); 1097 if(scriptCode != UScript.INVALID_CODE) { 1098 if(scriptCode == UScript.SIMPLIFIED_HAN || scriptCode == UScript.TRADITIONAL_HAN) { 1099 scriptCode = UScript.HAN; 1100 } 1101 return new int[] { scriptCode }; 1102 } 1103 } 1104 return null; 1105 } 1106 1107 /** 1108 * Helper function to find the code from locale. 1109 * @param locale The locale. 1110 */ findCodeFromLocale(ULocale locale)1111 private static int[] findCodeFromLocale(ULocale locale) { 1112 int[] result = getCodesFromLocale(locale); 1113 if(result != null) { 1114 return result; 1115 } 1116 ULocale likely = ULocale.addLikelySubtags(locale); 1117 return getCodesFromLocale(likely); 1118 } 1119 1120 /** 1121 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. 1122 * Returns MALAYAM given "Malayam" OR "Mlym". 1123 * Returns LATIN given "en" OR "en_US" 1124 * @param locale Locale 1125 * @return The script codes array. null if the the code cannot be found. 1126 * @stable ICU 2.4 1127 */ getCode(Locale locale)1128 public static final int[] getCode(Locale locale){ 1129 return findCodeFromLocale(ULocale.forLocale(locale)); 1130 } 1131 /** 1132 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. 1133 * Returns MALAYAM given "Malayam" OR "Mlym". 1134 * Returns LATIN given "en" OR "en_US" 1135 * @param locale ULocale 1136 * @return The script codes array. null if the the code cannot be found. 1137 * @stable ICU 3.0 1138 */ getCode(ULocale locale)1139 public static final int[] getCode(ULocale locale){ 1140 return findCodeFromLocale(locale); 1141 } 1142 /** 1143 * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. 1144 * Returns MALAYAM given "Malayam" OR "Mlym". 1145 * Returns LATIN given "en" OR "en_US" 1146 * 1147 * <p>Note: To search by short or long script alias only, use 1148 * {@link #getCodeFromName(String)} instead. 1149 * That does a fast lookup with no access of the locale data. 1150 * 1151 * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale 1152 * @return The script codes array. null if the the code cannot be found. 1153 * @stable ICU 2.4 1154 */ getCode(String nameOrAbbrOrLocale)1155 public static final int[] getCode(String nameOrAbbrOrLocale) { 1156 boolean triedCode = false; 1157 int lastSepPos = nameOrAbbrOrLocale.indexOf('_'); 1158 if (lastSepPos < 0) { 1159 lastSepPos = nameOrAbbrOrLocale.indexOf('-'); 1160 } 1161 // Favor interpretation of nameOrAbbrOrLocale as a script alias if either 1162 // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc. 1163 // 2. The last instance of -/_ is at offset 3, and the portion after that is 1164 // longer than 4 characters (i.e. not a script or region code). This handles 1165 // Old_Hungarian, Old_Italic, etc. ("old" is a valid language code) 1166 // 3. The last instance of -/_ is at offset 7, and the portion after that is 1167 // 3 characters. This handles New_Tai_Lue ("new" is a valid language code). 1168 if ( lastSepPos < 0 1169 || (lastSepPos == 3 && nameOrAbbrOrLocale.length() > 8) 1170 || (lastSepPos == 7 && nameOrAbbrOrLocale.length() == 11) ) { 1171 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale); 1172 if (propNum != UProperty.UNDEFINED) { 1173 return new int[] {propNum}; 1174 } 1175 triedCode = true; 1176 } 1177 int[] scripts = findCodeFromLocale(new ULocale(nameOrAbbrOrLocale)); 1178 if (scripts != null) { 1179 return scripts; 1180 } 1181 if (!triedCode) { 1182 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale); 1183 if (propNum != UProperty.UNDEFINED) { 1184 return new int[] {propNum}; 1185 } 1186 } 1187 return null; 1188 } 1189 1190 /** 1191 * Returns the script code associated with the given Unicode script property alias 1192 * (name or abbreviation). 1193 * Short aliases are ISO 15924 script codes. 1194 * Returns MALAYAM given "Malayam" OR "Mlym". 1195 * 1196 * @param nameOrAbbr name of the script or ISO 15924 code 1197 * @return The script code value, or INVALID_CODE if the code cannot be found. 1198 * @stable ICU 54 1199 */ getCodeFromName(String nameOrAbbr)1200 public static final int getCodeFromName(String nameOrAbbr) { 1201 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbr); 1202 return propNum == UProperty.UNDEFINED ? INVALID_CODE : propNum; 1203 } 1204 1205 /** 1206 * Gets the script code associated with the given codepoint. 1207 * Returns UScript.MALAYAM given 0x0D02 1208 * @param codepoint UChar32 codepoint 1209 * @return The script code 1210 * @stable ICU 2.4 1211 */ getScript(int codepoint)1212 public static final int getScript(int codepoint){ 1213 if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) { 1214 int scriptX=UCharacterProperty.INSTANCE.getAdditional(codepoint, 0)&UCharacterProperty.SCRIPT_X_MASK; 1215 int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX); 1216 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1217 return codeOrIndex; 1218 } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) { 1219 return UScript.COMMON; 1220 } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1221 return UScript.INHERITED; 1222 } else { 1223 return UCharacterProperty.INSTANCE.m_scriptExtensions_[codeOrIndex]; 1224 } 1225 }else{ 1226 throw new IllegalArgumentException(Integer.toString(codepoint)); 1227 } 1228 } 1229 1230 /** 1231 * Do the Script_Extensions of code point c contain script sc? 1232 * If c does not have explicit Script_Extensions, then this tests whether 1233 * c has the Script property value sc. 1234 * 1235 * <p>Some characters are commonly used in multiple scripts. 1236 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1237 * 1238 * @param c code point 1239 * @param sc script code 1240 * @return true if sc is in Script_Extensions(c) 1241 * @stable ICU 49 1242 */ hasScript(int c, int sc)1243 public static final boolean hasScript(int c, int sc) { 1244 int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK; 1245 int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX); 1246 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1247 return sc==codeOrIndex; 1248 } 1249 1250 char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_; 1251 int scx=codeOrIndex; // index into scriptExtensions 1252 if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1253 scx=scriptExtensions[scx+1]; 1254 } 1255 if(sc>0x7fff) { 1256 // Guard against bogus input that would 1257 // make us go past the Script_Extensions terminator. 1258 return false; 1259 } 1260 while(sc>scriptExtensions[scx]) { 1261 ++scx; 1262 } 1263 return sc==(scriptExtensions[scx]&0x7fff); 1264 } 1265 1266 /** 1267 * Sets code point c's Script_Extensions as script code integers into the output BitSet. 1268 * <ul> 1269 * <li>If c does have Script_Extensions, then the return value is 1270 * the negative number of Script_Extensions codes (= -set.cardinality()); 1271 * in this case, the Script property value 1272 * (normally Common or Inherited) is not included in the set. 1273 * <li>If c does not have Script_Extensions, then the one Script code is put into the set 1274 * and also returned. 1275 * <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set 1276 * and also returned. 1277 * </ul> 1278 * In other words, if the return value is non-negative, it is c's single Script code 1279 * and the set contains exactly this Script code. 1280 * If the return value is -n, then the set contains c's n>=2 Script_Extensions script codes. 1281 * 1282 * <p>Some characters are commonly used in multiple scripts. 1283 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1284 * 1285 * @param c code point 1286 * @param set set of script code integers; will be cleared, then bits are set 1287 * corresponding to c's Script_Extensions 1288 * @return negative number of script codes in c's Script_Extensions, 1289 * or the non-negative single Script value 1290 * @stable ICU 49 1291 */ getScriptExtensions(int c, BitSet set)1292 public static final int getScriptExtensions(int c, BitSet set) { 1293 set.clear(); 1294 int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK; 1295 int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX); 1296 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1297 set.set(codeOrIndex); 1298 return codeOrIndex; 1299 } 1300 1301 char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_; 1302 int scx=codeOrIndex; // index into scriptExtensions 1303 if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1304 scx=scriptExtensions[scx+1]; 1305 } 1306 int length=0; 1307 int sx; 1308 do { 1309 sx=scriptExtensions[scx++]; 1310 set.set(sx&0x7fff); 1311 ++length; 1312 } while(sx<0x8000); 1313 // length==set.cardinality() 1314 return -length; 1315 } 1316 1317 /** 1318 * Returns the long Unicode script name, if there is one. 1319 * Otherwise returns the 4-letter ISO 15924 script code. 1320 * Returns "Malayam" given MALAYALAM. 1321 * 1322 * @param scriptCode int script code 1323 * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code 1324 * @throws IllegalArgumentException if the script code is not valid 1325 * @stable ICU 2.4 1326 */ getName(int scriptCode)1327 public static final String getName(int scriptCode){ 1328 return UCharacter.getPropertyValueName(UProperty.SCRIPT, 1329 scriptCode, 1330 UProperty.NameChoice.LONG); 1331 } 1332 1333 /** 1334 * Returns the 4-letter ISO 15924 script code, 1335 * which is the same as the short Unicode script name if Unicode has names for the script. 1336 * Returns "Mlym" given MALAYALAM. 1337 * 1338 * @param scriptCode int script code 1339 * @return short script name (4-letter code) 1340 * @throws IllegalArgumentException if the script code is not valid 1341 * @stable ICU 2.4 1342 */ getShortName(int scriptCode)1343 public static final String getShortName(int scriptCode){ 1344 return UCharacter.getPropertyValueName(UProperty.SCRIPT, 1345 scriptCode, 1346 UProperty.NameChoice.SHORT); 1347 } 1348 1349 /** 1350 * Script metadata (script properties). 1351 * See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt 1352 */ 1353 private static final class ScriptMetadata { 1354 // 0 = NOT_ENCODED, no sample character, default false script properties. 1355 // Bits 20.. 0: sample character 1356 1357 // Bits 23..21: usage 1358 private static final int UNKNOWN = 1 << 21; 1359 private static final int EXCLUSION = 2 << 21; 1360 private static final int LIMITED_USE = 3 << 21; 1361 // vate static final int ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 1362 private static final int RECOMMENDED = 5 << 21; 1363 1364 // Bits 31..24: Single-bit flags 1365 private static final int RTL = 1 << 24; 1366 private static final int LB_LETTERS = 1 << 25; 1367 private static final int CASED = 1 << 26; 1368 1369 private static final int SCRIPT_PROPS[] = { 1370 // Begin copy-paste output from 1371 // tools/trunk/unicode/py/parsescriptmetadata.py 1372 // or from icu/trunk/source/common/uscript_props.cpp 1373 0x0040 | RECOMMENDED, // Zyyy 1374 0x0308 | RECOMMENDED, // Zinh 1375 0x0628 | RECOMMENDED | RTL, // Arab 1376 0x0531 | RECOMMENDED | CASED, // Armn 1377 0x0995 | RECOMMENDED, // Beng 1378 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 1379 0x13C4 | LIMITED_USE | CASED, // Cher 1380 0x03E2 | EXCLUSION | CASED, // Copt 1381 0x042F | RECOMMENDED | CASED, // Cyrl 1382 0x10414 | EXCLUSION | CASED, // Dsrt 1383 0x0905 | RECOMMENDED, // Deva 1384 0x12A0 | RECOMMENDED, // Ethi 1385 0x10D3 | RECOMMENDED, // Geor 1386 0x10330 | EXCLUSION, // Goth 1387 0x03A9 | RECOMMENDED | CASED, // Grek 1388 0x0A95 | RECOMMENDED, // Gujr 1389 0x0A15 | RECOMMENDED, // Guru 1390 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 1391 0xAC00 | RECOMMENDED, // Hang 1392 0x05D0 | RECOMMENDED | RTL, // Hebr 1393 0x304B | RECOMMENDED | LB_LETTERS, // Hira 1394 0x0C95 | RECOMMENDED, // Knda 1395 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 1396 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 1397 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 1398 0x004C | RECOMMENDED | CASED, // Latn 1399 0x0D15 | RECOMMENDED, // Mlym 1400 0x1826 | EXCLUSION, // Mong 1401 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 1402 0x168F | EXCLUSION, // Ogam 1403 0x10300 | EXCLUSION, // Ital 1404 0x0B15 | RECOMMENDED, // Orya 1405 0x16A0 | EXCLUSION, // Runr 1406 0x0D85 | RECOMMENDED, // Sinh 1407 0x0710 | LIMITED_USE | RTL, // Syrc 1408 0x0B95 | RECOMMENDED, // Taml 1409 0x0C15 | RECOMMENDED, // Telu 1410 0x078C | RECOMMENDED | RTL, // Thaa 1411 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 1412 0x0F40 | RECOMMENDED, // Tibt 1413 0x14C0 | LIMITED_USE, // Cans 1414 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii 1415 0x1703 | EXCLUSION, // Tglg 1416 0x1723 | EXCLUSION, // Hano 1417 0x1743 | EXCLUSION, // Buhd 1418 0x1763 | EXCLUSION, // Tagb 1419 0x280E | UNKNOWN, // Brai 1420 0x10800 | EXCLUSION | RTL, // Cprt 1421 0x1900 | LIMITED_USE, // Limb 1422 0x10000 | EXCLUSION, // Linb 1423 0x10480 | EXCLUSION, // Osma 1424 0x10450 | EXCLUSION, // Shaw 1425 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 1426 0x10380 | EXCLUSION, // Ugar 1427 0, 1428 0x1A00 | EXCLUSION, // Bugi 1429 0x2C00 | EXCLUSION | CASED, // Glag 1430 0x10A00 | EXCLUSION | RTL, // Khar 1431 0xA800 | LIMITED_USE, // Sylo 1432 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 1433 0x2D30 | LIMITED_USE, // Tfng 1434 0x103A0 | EXCLUSION, // Xpeo 1435 0x1B05 | LIMITED_USE, // Bali 1436 0x1BC0 | LIMITED_USE, // Batk 1437 0, 1438 0x11005 | EXCLUSION, // Brah 1439 0xAA00 | LIMITED_USE, // Cham 1440 0, 1441 0, 1442 0, 1443 0, 1444 0x13153 | EXCLUSION, // Egyp 1445 0, 1446 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 1447 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 1448 0x16B1C | EXCLUSION, // Hmng 1449 0x10CA1 | EXCLUSION | RTL | CASED, // Hung 1450 0, 1451 0xA984 | LIMITED_USE, // Java 1452 0xA90A | LIMITED_USE, // Kali 1453 0, 1454 0, 1455 0x1C00 | LIMITED_USE, // Lepc 1456 0x10647 | EXCLUSION, // Lina 1457 0x0840 | LIMITED_USE | RTL, // Mand 1458 0, 1459 0x10980 | EXCLUSION | RTL, // Mero 1460 0x07CA | LIMITED_USE | RTL, // Nkoo 1461 0x10C00 | EXCLUSION | RTL, // Orkh 1462 0x1036B | EXCLUSION, // Perm 1463 0xA840 | EXCLUSION, // Phag 1464 0x10900 | EXCLUSION | RTL, // Phnx 1465 0x16F00 | LIMITED_USE, // Plrd 1466 0, 1467 0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 0xA549 | LIMITED_USE, // Vaii 1473 0, 1474 0x12000 | EXCLUSION, // Xsux 1475 0, 1476 0xFDD0 | UNKNOWN, // Zzzz 1477 0x102A0 | EXCLUSION, // Cari 1478 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 1479 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 1480 0x10280 | EXCLUSION, // Lyci 1481 0x10920 | EXCLUSION | RTL, // Lydi 1482 0x1C5A | LIMITED_USE, // Olck 1483 0xA930 | EXCLUSION, // Rjng 1484 0xA882 | LIMITED_USE, // Saur 1485 0x1D850 | EXCLUSION, // Sgnw 1486 0x1B83 | LIMITED_USE, // Sund 1487 0, 1488 0xABC0 | LIMITED_USE, // Mtei 1489 0x10840 | EXCLUSION | RTL, // Armi 1490 0x10B00 | EXCLUSION | RTL, // Avst 1491 0x11103 | LIMITED_USE, // Cakm 1492 0xAC00 | RECOMMENDED, // Kore 1493 0x11083 | EXCLUSION, // Kthi 1494 0x10AD8 | EXCLUSION | RTL, // Mani 1495 0x10B60 | EXCLUSION | RTL, // Phli 1496 0x10B8F | EXCLUSION | RTL, // Phlp 1497 0, 1498 0x10B40 | EXCLUSION | RTL, // Prti 1499 0x0800 | EXCLUSION | RTL, // Samr 1500 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 1501 0, 1502 0, 1503 0xA6A0 | LIMITED_USE, // Bamu 1504 0xA4D0 | LIMITED_USE, // Lisu 1505 0, 1506 0x10A60 | EXCLUSION | RTL, // Sarb 1507 0x16AE6 | EXCLUSION, // Bass 1508 0x1BC20 | EXCLUSION, // Dupl 1509 0x10500 | EXCLUSION, // Elba 1510 0x11315 | EXCLUSION, // Gran 1511 0, 1512 0, 1513 0x1E802 | EXCLUSION | RTL, // Mend 1514 0x109A0 | EXCLUSION | RTL, // Merc 1515 0x10A95 | EXCLUSION | RTL, // Narb 1516 0x10896 | EXCLUSION | RTL, // Nbat 1517 0x10873 | EXCLUSION | RTL, // Palm 1518 0x112BE | EXCLUSION, // Sind 1519 0x118B4 | EXCLUSION | CASED, // Wara 1520 0, 1521 0, 1522 0x16A4F | EXCLUSION, // Mroo 1523 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu 1524 0x11183 | EXCLUSION, // Shrd 1525 0x110D0 | EXCLUSION, // Sora 1526 0x11680 | EXCLUSION, // Takr 1527 0x18229 | EXCLUSION | LB_LETTERS, // Tang 1528 0, 1529 0x14400 | EXCLUSION, // Hluw 1530 0x11208 | EXCLUSION, // Khoj 1531 0x11484 | EXCLUSION, // Tirh 1532 0x10537 | EXCLUSION, // Aghb 1533 0x11152 | EXCLUSION, // Mahj 1534 0x11717 | EXCLUSION | LB_LETTERS, // Ahom 1535 0x108F4 | EXCLUSION | RTL, // Hatr 1536 0x1160E | EXCLUSION, // Modi 1537 0x1128F | EXCLUSION, // Mult 1538 0x11AC0 | EXCLUSION, // Pauc 1539 0x1158E | EXCLUSION, // Sidd 1540 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm 1541 0x11C0E | EXCLUSION, // Bhks 1542 0x11C72 | EXCLUSION, // Marc 1543 0x11412 | LIMITED_USE, // Newa 1544 0x104B5 | LIMITED_USE | CASED, // Osge 1545 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb 1546 0x1112 | RECOMMENDED, // Jamo 1547 0, 1548 0x11D10 | EXCLUSION, // Gonm 1549 0x11A5C | EXCLUSION, // Soyo 1550 0x11A0B | EXCLUSION, // Zanb 1551 0x1180B | EXCLUSION, // Dogr 1552 0x11D71 | LIMITED_USE, // Gong 1553 0x11EE5 | EXCLUSION, // Maka 1554 0x16E40 | EXCLUSION | CASED, // Medf 1555 0x10D12 | LIMITED_USE | RTL, // Rohg 1556 0x10F42 | EXCLUSION | RTL, // Sogd 1557 0x10F19 | EXCLUSION | RTL, // Sogo 1558 0x10FF1 | EXCLUSION | RTL, // Elym 1559 0x1E108 | LIMITED_USE, // Hmnp 1560 0x119CE | EXCLUSION, // Nand 1561 0x1E2E1 | LIMITED_USE, // Wcho 1562 0x10FBF | EXCLUSION | RTL, // Chrs 1563 0x1190C | EXCLUSION, // Diak 1564 0x18C65 | EXCLUSION | LB_LETTERS, // Kits 1565 0x10E88 | EXCLUSION | RTL, // Yezi 1566 0x12FE5 | EXCLUSION, // Cpmn 1567 0x10F7C | EXCLUSION | RTL, // Ougr 1568 0x16ABC | EXCLUSION, // Tnsa 1569 0x1E290 | EXCLUSION, // Toto 1570 0x10582 | EXCLUSION | CASED, // Vith 1571 0x11F1B | EXCLUSION | LB_LETTERS, // Kawi 1572 0x1E4E6 | EXCLUSION, // Nagm 1573 // End copy-paste from parsescriptmetadata.py 1574 }; 1575 getScriptProps(int script)1576 private static final int getScriptProps(int script) { 1577 if (0 <= script && script < SCRIPT_PROPS.length) { 1578 return SCRIPT_PROPS[script]; 1579 } else { 1580 return 0; 1581 } 1582 } 1583 } 1584 1585 /** 1586 * Script usage constants. 1587 * See UAX #31 Unicode Identifier and Pattern Syntax. 1588 * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers 1589 * 1590 * @stable ICU 51 1591 */ 1592 public enum ScriptUsage { 1593 /** 1594 * Not encoded in Unicode. 1595 * @stable ICU 51 1596 */ 1597 NOT_ENCODED, 1598 /** 1599 * Unknown script usage. 1600 * @stable ICU 51 1601 */ 1602 UNKNOWN, 1603 /** 1604 * Candidate for Exclusion from Identifiers. 1605 * @stable ICU 51 1606 */ 1607 EXCLUDED, 1608 /** 1609 * Limited Use script. 1610 * @stable ICU 51 1611 */ 1612 LIMITED_USE, 1613 /** 1614 * Aspirational Use script. 1615 * @stable ICU 51 1616 */ 1617 ASPIRATIONAL, 1618 /** 1619 * Recommended script. 1620 * @stable ICU 51 1621 */ 1622 RECOMMENDED 1623 } 1624 private static final ScriptUsage[] usageValues = ScriptUsage.values(); 1625 1626 /** 1627 * Returns the script sample character string. 1628 * This string normally consists of one code point but might be longer. 1629 * The string is empty if the script is not encoded. 1630 * 1631 * @param script script code 1632 * @return the sample character string 1633 * @stable ICU 51 1634 */ getSampleString(int script)1635 public static final String getSampleString(int script) { 1636 int sampleChar = ScriptMetadata.getScriptProps(script) & 0x1fffff; 1637 if(sampleChar != 0) { 1638 return new StringBuilder().appendCodePoint(sampleChar).toString(); 1639 } 1640 return ""; 1641 } 1642 1643 /** 1644 * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. 1645 * Returns {@link ScriptUsage#NOT_ENCODED} if the script is not encoded in Unicode. 1646 * 1647 * @param script script code 1648 * @return script usage 1649 * @see ScriptUsage 1650 * @stable ICU 51 1651 */ getUsage(int script)1652 public static final ScriptUsage getUsage(int script) { 1653 return usageValues[(ScriptMetadata.getScriptProps(script) >> 21) & 7]; 1654 } 1655 1656 /** 1657 * Returns true if the script is written right-to-left. 1658 * For example, Arab and Hebr. 1659 * 1660 * @param script script code 1661 * @return true if the script is right-to-left 1662 * @stable ICU 51 1663 */ isRightToLeft(int script)1664 public static final boolean isRightToLeft(int script) { 1665 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.RTL) != 0; 1666 } 1667 1668 /** 1669 * Returns true if the script allows line breaks between letters (excluding hyphenation). 1670 * Such a script typically requires dictionary-based line breaking. 1671 * For example, Hani and Thai. 1672 * 1673 * @param script script code 1674 * @return true if the script allows line breaks between letters 1675 * @stable ICU 51 1676 */ breaksBetweenLetters(int script)1677 public static final boolean breaksBetweenLetters(int script) { 1678 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.LB_LETTERS) != 0; 1679 } 1680 1681 /** 1682 * Returns true if in modern (or most recent) usage of the script case distinctions are customary. 1683 * For example, Latn and Cyrl. 1684 * 1685 * @param script script code 1686 * @return true if the script is cased 1687 * @stable ICU 51 1688 */ isCased(int script)1689 public static final boolean isCased(int script) { 1690 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.CASED) != 0; 1691 } 1692 1693 ///CLOVER:OFF 1694 /** 1695 * Private Constructor. Never default construct 1696 */ UScript()1697 private UScript(){} 1698 ///CLOVER:ON 1699 } 1700