1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2014, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.impl; 12 13 import java.io.IOException; 14 import java.nio.ByteBuffer; 15 import java.util.Locale; 16 import java.util.MissingResourceException; 17 18 import ohos.global.icu.lang.UCharacter; 19 import ohos.global.icu.lang.UCharacterCategory; 20 import ohos.global.icu.text.UTF16; 21 import ohos.global.icu.text.UnicodeSet; 22 23 /** 24 * Internal class to manage character names. 25 * Since data for names are stored 26 * in an array of char, by default indexes used in this class is refering to 27 * a 2 byte count, unless otherwise stated. Cases where the index is refering 28 * to a byte count, the index is halved and depending on whether the index is 29 * even or odd, the MSB or LSB of the result char at the halved index is 30 * returned. For indexes to an array of int, the index is multiplied by 2, 31 * result char at the multiplied index and its following char is returned as an 32 * int. 33 * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class 34 * Note : 0 - 0x1F are control characters without names in Unicode 3.0 35 * @author Syn Wee Quek 36 * @hide exposed on OHOS 37 */ 38 39 public final class UCharacterName 40 { 41 // public data members ---------------------------------------------- 42 43 /* 44 * public singleton instance 45 */ 46 public static final UCharacterName INSTANCE; 47 48 static { 49 try { 50 INSTANCE = new UCharacterName(); 51 } catch (IOException e) { 52 ///CLOVER:OFF 53 throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","",""); 54 ///CLOVER:ON 55 } 56 } 57 58 /** 59 * Number of lines per group 60 * 1 << GROUP_SHIFT_ 61 */ 62 public static final int LINES_PER_GROUP_ = 1 << 5; 63 /** 64 * Maximum number of groups 65 */ 66 public int m_groupcount_ = 0; 67 68 // public methods --------------------------------------------------- 69 70 /** 71 * Retrieve the name of a Unicode code point. 72 * Depending on <code>choice</code>, the character name written into the 73 * buffer is the "modern" name or the name that was defined in Unicode 74 * version 1.0. 75 * The name contains only "invariant" characters 76 * like A-Z, 0-9, space, and '-'. 77 * 78 * @param ch the code point for which to get the name. 79 * @param choice Selector for which name to get. 80 * @return if code point is above 0x1fff, null is returned 81 */ getName(int ch, int choice)82 public String getName(int ch, int choice) 83 { 84 if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || 85 choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) { 86 return null; 87 } 88 89 String result = null; 90 91 result = getAlgName(ch, choice); 92 93 // getting normal character name 94 if (result == null || result.length() == 0) { 95 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 96 result = getExtendedName(ch); 97 } else { 98 result = getGroupName(ch, choice); 99 } 100 } 101 102 return result; 103 } 104 105 /** 106 * Find a character by its name and return its code point value 107 * @param choice selector to indicate if argument name is a Unicode 1.0 108 * or the most current version 109 * @param name the name to search for 110 * @return code point 111 */ getCharFromName(int choice, String name)112 public int getCharFromName(int choice, String name) 113 { 114 // checks for illegal arguments 115 if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT || 116 name == null || name.length() == 0) { 117 return -1; 118 } 119 120 // try extended names first 121 int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice); 122 if (result >= -1) { 123 return result; 124 } 125 126 String upperCaseName = name.toUpperCase(Locale.ENGLISH); 127 // try algorithmic names first, if fails then try group names 128 // int result = getAlgorithmChar(choice, uppercasename); 129 130 if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || 131 choice == UCharacterNameChoice.EXTENDED_CHAR_NAME 132 ) { 133 int count = 0; 134 if (m_algorithm_ != null) { 135 count = m_algorithm_.length; 136 } 137 for (count --; count >= 0; count --) { 138 result = m_algorithm_[count].getChar(upperCaseName); 139 if (result >= 0) { 140 return result; 141 } 142 } 143 } 144 145 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 146 result = getGroupChar(upperCaseName, 147 UCharacterNameChoice.UNICODE_CHAR_NAME); 148 if (result == -1) { 149 result = getGroupChar(upperCaseName, 150 UCharacterNameChoice.CHAR_NAME_ALIAS); 151 } 152 } 153 else { 154 result = getGroupChar(upperCaseName, choice); 155 } 156 return result; 157 } 158 159 // these are all UCharacterNameIterator use methods ------------------- 160 161 /** 162 * Reads a block of compressed lengths of 32 strings and expands them into 163 * offsets and lengths for each string. Lengths are stored with a 164 * variable-width encoding in consecutive nibbles: 165 * If a nibble<0xc, then it is the length itself (0 = empty string). 166 * If a nibble>=0xc, then it forms a length value with the following 167 * nibble. 168 * The offsets and lengths arrays must be at least 33 (one more) long 169 * because there is no check here at the end if the last nibble is still 170 * used. 171 * @param index of group string object in array 172 * @param offsets array to store the value of the string offsets 173 * @param lengths array to store the value of the string length 174 * @return next index of the data string immediately after the lengths 175 * in terms of byte address 176 */ getGroupLengths(int index, char offsets[], char lengths[])177 public int getGroupLengths(int index, char offsets[], char lengths[]) 178 { 179 char length = 0xffff; 180 byte b = 0, 181 n = 0; 182 int shift; 183 index = index * m_groupsize_; // byte count offsets of group strings 184 int stringoffset = UCharacterUtility.toInt( 185 m_groupinfo_[index + OFFSET_HIGH_OFFSET_], 186 m_groupinfo_[index + OFFSET_LOW_OFFSET_]); 187 188 offsets[0] = 0; 189 190 // all 32 lengths must be read to get the offset of the first group 191 // string 192 for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) { 193 b = m_groupstring_[stringoffset]; 194 shift = 4; 195 196 while (shift >= 0) { 197 // getting nibble 198 n = (byte)((b >> shift) & 0x0F); 199 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) { 200 length = (char)((n - 12) << 4); 201 } 202 else { 203 if (length != 0xffff) { 204 lengths[i] = (char)((length | n) + 12); 205 } 206 else { 207 lengths[i] = (char)n; 208 } 209 210 if (i < LINES_PER_GROUP_) { 211 offsets[i + 1] = (char)(offsets[i] + lengths[i]); 212 } 213 214 length = 0xffff; 215 i ++; 216 } 217 218 shift -= 4; 219 } 220 } 221 return stringoffset; 222 } 223 224 /** 225 * Gets the name of the argument group index. 226 * UnicodeData.txt uses ';' as a field separator, so no field can contain 227 * ';' as part of its contents. In unames.icu, it is marked as 228 * token[';'] == -1 only if the semicolon is used in the data file - which 229 * is iff we have Unicode 1.0 names or ISO comments or aliases. 230 * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases 231 * although we know that it will never be part of a name. 232 * Equivalent to ICU4C's expandName. 233 * @param index of the group name string in byte count 234 * @param length of the group name string 235 * @param choice of Unicode 1.0 name or the most current name 236 * @return name of the group 237 */ getGroupName(int index, int length, int choice)238 public String getGroupName(int index, int length, int choice) 239 { 240 if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && 241 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME 242 ) { 243 if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) { 244 /* 245 * skip the modern name if it is not requested _and_ 246 * if the semicolon byte value is a character, not a token number 247 */ 248 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; 249 do { 250 int oldindex = index; 251 index += UCharacterUtility.skipByteSubString(m_groupstring_, 252 index, length, (byte)';'); 253 length -= (index - oldindex); 254 } while(--fieldIndex>0); 255 } 256 else { 257 // the semicolon byte is a token number, therefore only modern 258 // names are stored in unames.dat and there is no such 259 // requested alternate name here 260 length = 0; 261 } 262 } 263 264 synchronized (m_utilStringBuffer_) { 265 m_utilStringBuffer_.setLength(0); 266 byte b; 267 char token; 268 for (int i = 0; i < length;) { 269 b = m_groupstring_[index + i]; 270 i ++; 271 272 if (b >= m_tokentable_.length) { 273 if (b == ';') { 274 break; 275 } 276 m_utilStringBuffer_.append(b); // implicit letter 277 } 278 else { 279 token = m_tokentable_[b & 0x00ff]; 280 if (token == 0xFFFE) { 281 // this is a lead byte for a double-byte token 282 token = m_tokentable_[b << 8 | 283 (m_groupstring_[index + i] & 0x00ff)]; 284 i ++; 285 } 286 if (token == 0xFFFF) { 287 if (b == ';') { 288 // skip the semicolon if we are seeking extended 289 // names and there was no 2.0 name but there 290 // is a 1.0 name. 291 if (m_utilStringBuffer_.length() == 0 && choice == 292 UCharacterNameChoice.EXTENDED_CHAR_NAME) { 293 continue; 294 } 295 break; 296 } 297 // explicit letter 298 m_utilStringBuffer_.append((char)(b & 0x00ff)); 299 } 300 else { // write token word 301 UCharacterUtility.getNullTermByteSubString( 302 m_utilStringBuffer_, m_tokenstring_, token); 303 } 304 } 305 } 306 307 if (m_utilStringBuffer_.length() > 0) { 308 return m_utilStringBuffer_.toString(); 309 } 310 } 311 return null; 312 } 313 314 /** 315 * Retrieves the extended name 316 */ getExtendedName(int ch)317 public String getExtendedName(int ch) 318 { 319 String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 320 if (result == null) { 321 // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F. 322 result = getExtendedOr10Name(ch); 323 } 324 return result; 325 } 326 327 /** 328 * Gets the group index for the codepoint, or the group before it. 329 * @param codepoint The codepoint index. 330 * @return group index containing codepoint or the group before it. 331 */ getGroup(int codepoint)332 public int getGroup(int codepoint) 333 { 334 int endGroup = m_groupcount_; 335 int msb = getCodepointMSB(codepoint); 336 int result = 0; 337 // binary search for the group of names that contains the one for 338 // code 339 // find the group that contains codepoint, or the highest before it 340 while (result < endGroup - 1) { 341 int gindex = (result + endGroup) >> 1; 342 if (msb < getGroupMSB(gindex)) { 343 endGroup = gindex; 344 } 345 else { 346 result = gindex; 347 } 348 } 349 return result; 350 } 351 352 /** 353 * Gets the extended and 1.0 name when the most current unicode names 354 * fail 355 * @param ch codepoint 356 * @return name of codepoint extended or 1.0 357 */ getExtendedOr10Name(int ch)358 public String getExtendedOr10Name(int ch) 359 { 360 String result = null; 361 // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F. 362 if (result == null) { 363 int type = getType(ch); 364 // Return unknown if the table of names above is not up to 365 // date. 366 if (type >= TYPE_NAMES_.length) { 367 result = UNKNOWN_TYPE_NAME_; 368 } 369 else { 370 result = TYPE_NAMES_[type]; 371 } 372 synchronized (m_utilStringBuffer_) { 373 m_utilStringBuffer_.setLength(0); 374 m_utilStringBuffer_.append('<'); 375 m_utilStringBuffer_.append(result); 376 m_utilStringBuffer_.append('-'); 377 String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); 378 int zeros = 4 - chStr.length(); 379 while (zeros > 0) { 380 m_utilStringBuffer_.append('0'); 381 zeros --; 382 } 383 m_utilStringBuffer_.append(chStr); 384 m_utilStringBuffer_.append('>'); 385 result = m_utilStringBuffer_.toString(); 386 } 387 } 388 return result; 389 } 390 391 /** 392 * Gets the MSB from the group index 393 * @param gindex group index 394 * @return the MSB of the group if gindex is valid, -1 otherwise 395 */ getGroupMSB(int gindex)396 public int getGroupMSB(int gindex) 397 { 398 if (gindex >= m_groupcount_) { 399 return -1; 400 } 401 return m_groupinfo_[gindex * m_groupsize_]; 402 } 403 404 /** 405 * Gets the MSB of the codepoint 406 * @param codepoint The codepoint value. 407 * @return the MSB of the codepoint 408 */ getCodepointMSB(int codepoint)409 public static int getCodepointMSB(int codepoint) 410 { 411 return codepoint >> GROUP_SHIFT_; 412 } 413 414 /** 415 * Gets the maximum codepoint + 1 of the group 416 * @param msb most significant byte of the group 417 * @return limit codepoint of the group 418 */ getGroupLimit(int msb)419 public static int getGroupLimit(int msb) 420 { 421 return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_; 422 } 423 424 /** 425 * Gets the minimum codepoint of the group 426 * @param msb most significant byte of the group 427 * @return minimum codepoint of the group 428 */ getGroupMin(int msb)429 public static int getGroupMin(int msb) 430 { 431 return msb << GROUP_SHIFT_; 432 } 433 434 /** 435 * Gets the offset to a group 436 * @param codepoint The codepoint value. 437 * @return offset to a group 438 */ getGroupOffset(int codepoint)439 public static int getGroupOffset(int codepoint) 440 { 441 return codepoint & GROUP_MASK_; 442 } 443 444 /** 445 * Gets the minimum codepoint of a group 446 * @param codepoint The codepoint value. 447 * @return minimum codepoint in the group which codepoint belongs to 448 */ 449 ///CLOVER:OFF getGroupMinFromCodepoint(int codepoint)450 public static int getGroupMinFromCodepoint(int codepoint) 451 { 452 return codepoint & ~GROUP_MASK_; 453 } 454 ///CLOVER:ON 455 456 /** 457 * Get the Algorithm range length 458 * @return Algorithm range length 459 */ getAlgorithmLength()460 public int getAlgorithmLength() 461 { 462 return m_algorithm_.length; 463 } 464 465 /** 466 * Gets the start of the range 467 * @param index algorithm index 468 * @return algorithm range start 469 */ getAlgorithmStart(int index)470 public int getAlgorithmStart(int index) 471 { 472 return m_algorithm_[index].m_rangestart_; 473 } 474 475 /** 476 * Gets the end of the range 477 * @param index algorithm index 478 * @return algorithm range end 479 */ getAlgorithmEnd(int index)480 public int getAlgorithmEnd(int index) 481 { 482 return m_algorithm_[index].m_rangeend_; 483 } 484 485 /** 486 * Gets the Algorithmic name of the codepoint 487 * @param index algorithmic range index 488 * @param codepoint The codepoint value. 489 * @return algorithmic name of codepoint 490 */ getAlgorithmName(int index, int codepoint)491 public String getAlgorithmName(int index, int codepoint) 492 { 493 String result = null; 494 synchronized (m_utilStringBuffer_) { 495 m_utilStringBuffer_.setLength(0); 496 m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_); 497 result = m_utilStringBuffer_.toString(); 498 } 499 return result; 500 } 501 502 /** 503 * Gets the group name of the character 504 * @param ch character to get the group name 505 * @param choice name choice selector to choose a unicode 1.0 or newer name 506 */ getGroupName(int ch, int choice)507 public synchronized String getGroupName(int ch, int choice) 508 { 509 // gets the msb 510 int msb = getCodepointMSB(ch); 511 int group = getGroup(ch); 512 513 // return this if it is an exact match 514 if (msb == m_groupinfo_[group * m_groupsize_]) { 515 int index = getGroupLengths(group, m_groupoffsets_, 516 m_grouplengths_); 517 int offset = ch & GROUP_MASK_; 518 return getGroupName(index + m_groupoffsets_[offset], 519 m_grouplengths_[offset], choice); 520 } 521 522 return null; 523 } 524 525 // these are transliterator use methods --------------------------------- 526 527 /** 528 * Gets the maximum length of any codepoint name. 529 * Equivalent to uprv_getMaxCharNameLength. 530 * @return the maximum length of any codepoint name 531 */ getMaxCharNameLength()532 public int getMaxCharNameLength() 533 { 534 if (initNameSetsLengths()) { 535 return m_maxNameLength_; 536 } 537 else { 538 return 0; 539 } 540 } 541 542 /** 543 * Gets the maximum length of any iso comments. 544 * Equivalent to uprv_getMaxISOCommentLength. 545 * @return the maximum length of any codepoint name 546 */ 547 ///CLOVER:OFF getMaxISOCommentLength()548 public int getMaxISOCommentLength() 549 { 550 if (initNameSetsLengths()) { 551 return m_maxISOCommentLength_; 552 } 553 else { 554 return 0; 555 } 556 } 557 ///CLOVER:ON 558 559 /** 560 * Fills set with characters that are used in Unicode character names. 561 * Equivalent to uprv_getCharNameCharacters. 562 * @param set USet to receive characters. Existing contents are deleted. 563 */ getCharNameCharacters(UnicodeSet set)564 public void getCharNameCharacters(UnicodeSet set) 565 { 566 convert(m_nameSet_, set); 567 } 568 569 /** 570 * Fills set with characters that are used in Unicode character names. 571 * Equivalent to uprv_getISOCommentCharacters. 572 * @param set USet to receive characters. Existing contents are deleted. 573 */ 574 ///CLOVER:OFF getISOCommentCharacters(UnicodeSet set)575 public void getISOCommentCharacters(UnicodeSet set) 576 { 577 convert(m_ISOCommentSet_, set); 578 } 579 ///CLOVER:ON 580 581 // package private inner class -------------------------------------- 582 583 /** 584 * Algorithmic name class 585 */ 586 static final class AlgorithmName 587 { 588 // package private data members ---------------------------------- 589 590 /** 591 * Constant type value of the different AlgorithmName 592 */ 593 static final int TYPE_0_ = 0; 594 static final int TYPE_1_ = 1; 595 596 // package private constructors ---------------------------------- 597 598 /** 599 * Constructor 600 */ AlgorithmName()601 AlgorithmName() 602 { 603 } 604 605 // package private methods --------------------------------------- 606 607 /** 608 * Sets the information for accessing the algorithmic names 609 * @param rangestart starting code point that lies within this name group 610 * @param rangeend end code point that lies within this name group 611 * @param type algorithm type. There's 2 kinds of algorithmic type. First 612 * which uses code point as part of its name and the other uses 613 * variant postfix strings 614 * @param variant algorithmic variant 615 * @return true if values are valid 616 */ setInfo(int rangestart, int rangeend, byte type, byte variant)617 boolean setInfo(int rangestart, int rangeend, byte type, byte variant) 618 { 619 if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend 620 && rangeend <= UCharacter.MAX_VALUE && 621 (type == TYPE_0_ || type == TYPE_1_)) { 622 m_rangestart_ = rangestart; 623 m_rangeend_ = rangeend; 624 m_type_ = type; 625 m_variant_ = variant; 626 return true; 627 } 628 return false; 629 } 630 631 /** 632 * Sets the factor data 633 * @param factor Array of factor 634 * @return true if factors are valid 635 */ setFactor(char factor[])636 boolean setFactor(char factor[]) 637 { 638 if (factor.length == m_variant_) { 639 m_factor_ = factor; 640 return true; 641 } 642 return false; 643 } 644 645 /** 646 * Sets the name prefix 647 * @param prefix 648 * @return true if prefix is set 649 */ setPrefix(String prefix)650 boolean setPrefix(String prefix) 651 { 652 if (prefix != null && prefix.length() > 0) { 653 m_prefix_ = prefix; 654 return true; 655 } 656 return false; 657 } 658 659 /** 660 * Sets the variant factorized name data 661 * @param string variant factorized name data 662 * @return true if values are set 663 */ setFactorString(byte string[])664 boolean setFactorString(byte string[]) 665 { 666 // factor and variant string can be empty for things like 667 // hanggul code points 668 m_factorstring_ = string; 669 return true; 670 } 671 672 /** 673 * Checks if code point lies in Algorithm object at index 674 * @param ch code point 675 */ contains(int ch)676 boolean contains(int ch) 677 { 678 return m_rangestart_ <= ch && ch <= m_rangeend_; 679 } 680 681 /** 682 * Appends algorithm name of code point into StringBuffer. 683 * Note this method does not check for validity of code point in Algorithm, 684 * result is undefined if code point does not belong in Algorithm. 685 * @param ch code point 686 * @param str StringBuffer to append to 687 */ appendName(int ch, StringBuffer str)688 void appendName(int ch, StringBuffer str) 689 { 690 str.append(m_prefix_); 691 switch (m_type_) 692 { 693 case TYPE_0_: 694 // prefix followed by hex digits indicating variants 695 str.append(Utility.hex(ch,m_variant_)); 696 break; 697 case TYPE_1_: 698 // prefix followed by factorized-elements 699 int offset = ch - m_rangestart_; 700 int indexes[] = m_utilIntBuffer_; 701 int factor; 702 703 // write elements according to the factors 704 // the factorized elements are determined by modulo 705 // arithmetic 706 synchronized (m_utilIntBuffer_) { 707 for (int i = m_variant_ - 1; i > 0; i --) 708 { 709 factor = m_factor_[i] & 0x00FF; 710 indexes[i] = offset % factor; 711 offset /= factor; 712 } 713 714 // we don't need to calculate the last modulus because 715 // start <= code <= end guarantees here that 716 // code <= factors[0] 717 indexes[0] = offset; 718 719 // joining up the factorized strings 720 str.append(getFactorString(indexes, m_variant_)); 721 } 722 break; 723 } 724 } 725 726 /** 727 * Gets the character for the argument algorithmic name 728 * @return the algorithmic char or -1 otherwise. 729 */ getChar(String name)730 int getChar(String name) 731 { 732 int prefixlen = m_prefix_.length(); 733 if (name.length() < prefixlen || 734 !m_prefix_.equals(name.substring(0, prefixlen))) { 735 return -1; 736 } 737 738 switch (m_type_) 739 { 740 case TYPE_0_ : 741 try 742 { 743 int result = Integer.parseInt(name.substring(prefixlen), 744 16); 745 // does it fit into the range? 746 if (m_rangestart_ <= result && result <= m_rangeend_) { 747 return result; 748 } 749 } 750 catch (NumberFormatException e) 751 { 752 return -1; 753 } 754 break; 755 case TYPE_1_ : 756 // repetitative suffix name comparison done here 757 // offset is the character code - start 758 for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++) 759 { 760 int offset = ch - m_rangestart_; 761 int indexes[] = m_utilIntBuffer_; 762 int factor; 763 764 // write elements according to the factors 765 // the factorized elements are determined by modulo 766 // arithmetic 767 synchronized (m_utilIntBuffer_) { 768 for (int i = m_variant_ - 1; i > 0; i --) 769 { 770 factor = m_factor_[i] & 0x00FF; 771 indexes[i] = offset % factor; 772 offset /= factor; 773 } 774 775 // we don't need to calculate the last modulus 776 // because start <= code <= end guarantees here that 777 // code <= factors[0] 778 indexes[0] = offset; 779 780 // joining up the factorized strings 781 if (compareFactorString(indexes, m_variant_, name, 782 prefixlen)) { 783 return ch; 784 } 785 } 786 } 787 } 788 789 return -1; 790 } 791 792 /** 793 * Adds all chars in the set of algorithmic names into the set. 794 * Equivalent to part of calcAlgNameSetsLengths. 795 * @param set int set to add the chars of the algorithm names into 796 * @param maxlength maximum length to compare to 797 * @return the length that is either maxlength of the length of this 798 * algorithm name if it is longer than maxlength 799 */ add(int set[], int maxlength)800 int add(int set[], int maxlength) 801 { 802 // prefix length 803 int length = UCharacterName.add(set, m_prefix_); 804 switch (m_type_) { 805 case TYPE_0_ : { 806 // name = prefix + (range->variant times) hex-digits 807 // prefix 808 length += m_variant_; 809 /* synwee to check 810 * addString(set, (const char *)(range + 1)) 811 + range->variant;*/ 812 break; 813 } 814 case TYPE_1_ : { 815 // name = prefix factorized-elements 816 // get the set and maximum factor suffix length for each 817 // factor 818 for (int i = m_variant_ - 1; i > 0; i --) 819 { 820 int maxfactorlength = 0; 821 int count = 0; 822 for (int factor = m_factor_[i]; factor > 0; -- factor) { 823 synchronized (m_utilStringBuffer_) { 824 m_utilStringBuffer_.setLength(0); 825 count 826 = UCharacterUtility.getNullTermByteSubString( 827 m_utilStringBuffer_, 828 m_factorstring_, count); 829 UCharacterName.add(set, m_utilStringBuffer_); 830 if (m_utilStringBuffer_.length() 831 > maxfactorlength) 832 { 833 maxfactorlength 834 = m_utilStringBuffer_.length(); 835 } 836 } 837 } 838 length += maxfactorlength; 839 } 840 } 841 } 842 if (length > maxlength) { 843 return length; 844 } 845 return maxlength; 846 } 847 848 // private data members ------------------------------------------ 849 850 /** 851 * Algorithmic data information 852 */ 853 private int m_rangestart_; 854 private int m_rangeend_; 855 private byte m_type_; 856 private byte m_variant_; 857 private char m_factor_[]; 858 private String m_prefix_; 859 private byte m_factorstring_[]; 860 /** 861 * Utility StringBuffer 862 */ 863 private StringBuffer m_utilStringBuffer_ = new StringBuffer(); 864 /** 865 * Utility int buffer 866 */ 867 private int m_utilIntBuffer_[] = new int[256]; 868 869 // private methods ----------------------------------------------- 870 871 /** 872 * Gets the indexth string in each of the argument factor block 873 * @param index array with each index corresponding to each factor block 874 * @param length length of the array index 875 * @return the combined string of the array of indexth factor string in 876 * factor block 877 */ getFactorString(int index[], int length)878 private String getFactorString(int index[], int length) 879 { 880 int size = m_factor_.length; 881 if (index == null || length != size) { 882 return null; 883 } 884 885 synchronized (m_utilStringBuffer_) { 886 m_utilStringBuffer_.setLength(0); 887 int count = 0; 888 int factor; 889 size --; 890 for (int i = 0; i <= size; i ++) { 891 factor = m_factor_[i]; 892 count = UCharacterUtility.skipNullTermByteSubString( 893 m_factorstring_, count, index[i]); 894 count = UCharacterUtility.getNullTermByteSubString( 895 m_utilStringBuffer_, m_factorstring_, 896 count); 897 if (i != size) { 898 count = UCharacterUtility.skipNullTermByteSubString( 899 m_factorstring_, count, 900 factor - index[i] - 1); 901 } 902 } 903 return m_utilStringBuffer_.toString(); 904 } 905 } 906 907 /** 908 * Compares the indexth string in each of the argument factor block with 909 * the argument string 910 * @param index array with each index corresponding to each factor block 911 * @param length index array length 912 * @param str string to compare with 913 * @param offset of str to start comparison 914 * @return true if string matches 915 */ compareFactorString(int index[], int length, String str, int offset)916 private boolean compareFactorString(int index[], int length, String str, 917 int offset) 918 { 919 int size = m_factor_.length; 920 if (index == null || length != size) 921 return false; 922 923 int count = 0; 924 int strcount = offset; 925 int factor; 926 size --; 927 for (int i = 0; i <= size; i ++) 928 { 929 factor = m_factor_[i]; 930 count = UCharacterUtility.skipNullTermByteSubString( 931 m_factorstring_, count, index[i]); 932 strcount = UCharacterUtility.compareNullTermByteSubString(str, 933 m_factorstring_, strcount, count); 934 if (strcount < 0) { 935 return false; 936 } 937 938 if (i != size) { 939 count = UCharacterUtility.skipNullTermByteSubString( 940 m_factorstring_, count, factor - index[i]); 941 } 942 } 943 if (strcount != str.length()) { 944 return false; 945 } 946 return true; 947 } 948 } 949 950 // package private data members -------------------------------------- 951 952 /** 953 * Size of each groups 954 */ 955 int m_groupsize_ = 0; 956 957 // package private methods -------------------------------------------- 958 959 /** 960 * Sets the token data 961 * @param token array of tokens 962 * @param tokenstring array of string values of the tokens 963 * @return false if there is a data error 964 */ setToken(char token[], byte tokenstring[])965 boolean setToken(char token[], byte tokenstring[]) 966 { 967 if (token != null && tokenstring != null && token.length > 0 && 968 tokenstring.length > 0) { 969 m_tokentable_ = token; 970 m_tokenstring_ = tokenstring; 971 return true; 972 } 973 return false; 974 } 975 976 /** 977 * Set the algorithm name information array 978 * @param alg Algorithm information array 979 * @return true if the group string offset has been set correctly 980 */ setAlgorithm(AlgorithmName alg[])981 boolean setAlgorithm(AlgorithmName alg[]) 982 { 983 if (alg != null && alg.length != 0) { 984 m_algorithm_ = alg; 985 return true; 986 } 987 return false; 988 } 989 990 /** 991 * Sets the number of group and size of each group in number of char 992 * @param count number of groups 993 * @param size size of group in char 994 * @return true if group size is set correctly 995 */ setGroupCountSize(int count, int size)996 boolean setGroupCountSize(int count, int size) 997 { 998 if (count <= 0 || size <= 0) { 999 return false; 1000 } 1001 m_groupcount_ = count; 1002 m_groupsize_ = size; 1003 return true; 1004 } 1005 1006 /** 1007 * Sets the group name data 1008 * @param group index information array 1009 * @param groupstring name information array 1010 * @return false if there is a data error 1011 */ setGroup(char group[], byte groupstring[])1012 boolean setGroup(char group[], byte groupstring[]) 1013 { 1014 if (group != null && groupstring != null && group.length > 0 && 1015 groupstring.length > 0) { 1016 m_groupinfo_ = group; 1017 m_groupstring_ = groupstring; 1018 return true; 1019 } 1020 return false; 1021 } 1022 1023 // private data members ---------------------------------------------- 1024 1025 /** 1026 * Data used in unames.icu 1027 */ 1028 private char m_tokentable_[]; 1029 private byte m_tokenstring_[]; 1030 private char m_groupinfo_[]; 1031 private byte m_groupstring_[]; 1032 private AlgorithmName m_algorithm_[]; 1033 1034 /** 1035 * Group use. Note - access must be synchronized. 1036 */ 1037 private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1]; 1038 private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1]; 1039 1040 /** 1041 * Default name of the name datafile 1042 */ 1043 private static final String FILE_NAME_ = "unames.icu"; 1044 /** 1045 * Shift count to retrieve group information 1046 */ 1047 private static final int GROUP_SHIFT_ = 5; 1048 /** 1049 * Mask to retrieve the offset for a particular character within a group 1050 */ 1051 private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1; 1052 1053 /** 1054 * Position of offsethigh in group information array 1055 */ 1056 private static final int OFFSET_HIGH_OFFSET_ = 1; 1057 1058 /** 1059 * Position of offsetlow in group information array 1060 */ 1061 private static final int OFFSET_LOW_OFFSET_ = 2; 1062 /** 1063 * Double nibble indicator, any nibble > this number has to be combined 1064 * with its following nibble 1065 */ 1066 private static final int SINGLE_NIBBLE_MAX_ = 11; 1067 1068 /* 1069 * Maximum length of character names (regular & 1.0). 1070 */ 1071 //private static int MAX_NAME_LENGTH_ = 0; 1072 /* 1073 * Maximum length of ISO comments. 1074 */ 1075 //private static int MAX_ISO_COMMENT_LENGTH_ = 0; 1076 1077 /** 1078 * Set of chars used in character names (regular & 1.0). 1079 * Chars are platform-dependent (can be EBCDIC). 1080 */ 1081 private int m_nameSet_[] = new int[8]; 1082 /** 1083 * Set of chars used in ISO comments. (regular & 1.0). 1084 * Chars are platform-dependent (can be EBCDIC). 1085 */ 1086 private int m_ISOCommentSet_[] = new int[8]; 1087 /** 1088 * Utility StringBuffer 1089 */ 1090 private StringBuffer m_utilStringBuffer_ = new StringBuffer(); 1091 /** 1092 * Utility int buffer 1093 */ 1094 private int m_utilIntBuffer_[] = new int[2]; 1095 /** 1096 * Maximum ISO comment length 1097 */ 1098 private int m_maxISOCommentLength_; 1099 /** 1100 * Maximum name length 1101 */ 1102 private int m_maxNameLength_; 1103 /** 1104 * Type names used for extended names 1105 */ 1106 private static final String TYPE_NAMES_[] = {"unassigned", 1107 "uppercase letter", 1108 "lowercase letter", 1109 "titlecase letter", 1110 "modifier letter", 1111 "other letter", 1112 "non spacing mark", 1113 "enclosing mark", 1114 "combining spacing mark", 1115 "decimal digit number", 1116 "letter number", 1117 "other number", 1118 "space separator", 1119 "line separator", 1120 "paragraph separator", 1121 "control", 1122 "format", 1123 "private use area", 1124 "surrogate", 1125 "dash punctuation", 1126 "start punctuation", 1127 "end punctuation", 1128 "connector punctuation", 1129 "other punctuation", 1130 "math symbol", 1131 "currency symbol", 1132 "modifier symbol", 1133 "other symbol", 1134 "initial punctuation", 1135 "final punctuation", 1136 "noncharacter", 1137 "lead surrogate", 1138 "trail surrogate"}; 1139 /** 1140 * Unknown type name 1141 */ 1142 private static final String UNKNOWN_TYPE_NAME_ = "unknown"; 1143 /** 1144 * Not a character type 1145 */ 1146 private static final int NON_CHARACTER_ 1147 = UCharacterCategory.CHAR_CATEGORY_COUNT; 1148 /** 1149 * Lead surrogate type 1150 */ 1151 private static final int LEAD_SURROGATE_ 1152 = UCharacterCategory.CHAR_CATEGORY_COUNT + 1; 1153 /** 1154 * Trail surrogate type 1155 */ 1156 private static final int TRAIL_SURROGATE_ 1157 = UCharacterCategory.CHAR_CATEGORY_COUNT + 2; 1158 /** 1159 * Extended category count 1160 */ 1161 static final int EXTENDED_CATEGORY_ 1162 = UCharacterCategory.CHAR_CATEGORY_COUNT + 3; 1163 1164 // private constructor ------------------------------------------------ 1165 1166 /** 1167 * <p>Protected constructor for use in UCharacter.</p> 1168 * @exception IOException thrown when data reading fails 1169 */ UCharacterName()1170 private UCharacterName() throws IOException 1171 { 1172 ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_); 1173 UCharacterNameReader reader = new UCharacterNameReader(b); 1174 reader.read(this); 1175 } 1176 1177 // private methods --------------------------------------------------- 1178 1179 /** 1180 * Gets the algorithmic name for the argument character 1181 * @param ch character to determine name for 1182 * @param choice name choice 1183 * @return the algorithmic name or null if not found 1184 */ getAlgName(int ch, int choice)1185 private String getAlgName(int ch, int choice) 1186 { 1187 /* Only the normative character name can be algorithmic. */ 1188 if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || 1189 choice == UCharacterNameChoice.EXTENDED_CHAR_NAME 1190 ) { 1191 // index in terms integer index 1192 synchronized (m_utilStringBuffer_) { 1193 m_utilStringBuffer_.setLength(0); 1194 1195 for (int index = m_algorithm_.length - 1; index >= 0; index --) 1196 { 1197 if (m_algorithm_[index].contains(ch)) { 1198 m_algorithm_[index].appendName(ch, m_utilStringBuffer_); 1199 return m_utilStringBuffer_.toString(); 1200 } 1201 } 1202 } 1203 } 1204 return null; 1205 } 1206 1207 /** 1208 * Getting the character with the tokenized argument name 1209 * @param name of the character 1210 * @return character with the tokenized argument name or -1 if character 1211 * is not found 1212 */ getGroupChar(String name, int choice)1213 private synchronized int getGroupChar(String name, int choice) 1214 { 1215 for (int i = 0; i < m_groupcount_; i ++) { 1216 // populating the data set of grouptable 1217 1218 int startgpstrindex = getGroupLengths(i, m_groupoffsets_, 1219 m_grouplengths_); 1220 1221 // shift out to function 1222 int result = getGroupChar(startgpstrindex, m_grouplengths_, name, 1223 choice); 1224 if (result != -1) { 1225 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) 1226 | result; 1227 } 1228 } 1229 return -1; 1230 } 1231 1232 /** 1233 * Compares and retrieve character if name is found within the argument 1234 * group 1235 * @param index index where the set of names reside in the group block 1236 * @param length list of lengths of the strings 1237 * @param name character name to search for 1238 * @param choice of either 1.0 or the most current unicode name 1239 * @return relative character in the group which matches name, otherwise if 1240 * not found, -1 will be returned 1241 */ getGroupChar(int index, char length[], String name, int choice)1242 private int getGroupChar(int index, char length[], String name, 1243 int choice) 1244 { 1245 byte b = 0; 1246 char token; 1247 int len; 1248 int namelen = name.length(); 1249 int nindex; 1250 int count; 1251 1252 for (int result = 0; result <= LINES_PER_GROUP_; result ++) { 1253 nindex = 0; 1254 len = length[result]; 1255 1256 if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && 1257 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME 1258 ) { 1259 /* 1260 * skip the modern name if it is not requested _and_ 1261 * if the semicolon byte value is a character, not a token number 1262 */ 1263 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; 1264 do { 1265 int oldindex = index; 1266 index += UCharacterUtility.skipByteSubString(m_groupstring_, 1267 index, len, (byte)';'); 1268 len -= (index - oldindex); 1269 } while(--fieldIndex>0); 1270 } 1271 1272 // number of tokens is > the length of the name 1273 // write each letter directly, and write a token word per token 1274 for (count = 0; count < len && nindex != -1 && nindex < namelen; 1275 ) { 1276 b = m_groupstring_[index + count]; 1277 count ++; 1278 1279 if (b >= m_tokentable_.length) { 1280 if (name.charAt(nindex ++) != (b & 0xFF)) { 1281 nindex = -1; 1282 } 1283 } 1284 else { 1285 token = m_tokentable_[b & 0xFF]; 1286 if (token == 0xFFFE) { 1287 // this is a lead byte for a double-byte token 1288 token = m_tokentable_[b << 8 | 1289 (m_groupstring_[index + count] & 0x00ff)]; 1290 count ++; 1291 } 1292 if (token == 0xFFFF) { 1293 if (name.charAt(nindex ++) != (b & 0xFF)) { 1294 nindex = -1; 1295 } 1296 } 1297 else { 1298 // compare token with name 1299 nindex = UCharacterUtility.compareNullTermByteSubString( 1300 name, m_tokenstring_, nindex, token); 1301 } 1302 } 1303 } 1304 1305 if (namelen == nindex && 1306 (count == len || m_groupstring_[index + count] == ';')) { 1307 return result; 1308 } 1309 1310 index += len; 1311 } 1312 return -1; 1313 } 1314 1315 /** 1316 * Gets the character extended type 1317 * @param ch character to be tested 1318 * @return extended type it is associated with 1319 */ getType(int ch)1320 private static int getType(int ch) 1321 { 1322 if (UCharacterUtility.isNonCharacter(ch)) { 1323 // not a character we return a invalid category count 1324 return NON_CHARACTER_; 1325 } 1326 int result = UCharacter.getType(ch); 1327 if (result == UCharacterCategory.SURROGATE) { 1328 if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 1329 result = LEAD_SURROGATE_; 1330 } 1331 else { 1332 result = TRAIL_SURROGATE_; 1333 } 1334 } 1335 return result; 1336 } 1337 1338 /** 1339 * Getting the character with extended name of the form <....>. 1340 * @param name of the character to be found 1341 * @param choice name choice 1342 * @return character associated with the name, -1 if such character is not 1343 * found and -2 if we should continue with the search. 1344 */ getExtendedChar(String name, int choice)1345 private static int getExtendedChar(String name, int choice) 1346 { 1347 if (name.charAt(0) == '<') { 1348 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 1349 int endIndex = name.length() - 1; 1350 if (name.charAt(endIndex) == '>') { 1351 int startIndex = name.lastIndexOf('-'); 1352 if (startIndex >= 0) { // We've got a category. 1353 startIndex ++; 1354 1355 // There should be 1 to 8 hex digits. 1356 int hexLength = endIndex - startIndex; 1357 if (hexLength < 1 || 8 < hexLength) { 1358 return -1; 1359 } 1360 int result = -1; 1361 try { 1362 result = Integer.parseInt( 1363 name.substring(startIndex, endIndex), 1364 16); 1365 } 1366 catch (NumberFormatException e) { 1367 return -1; 1368 } 1369 if (result < 0 || 0x10ffff < result) { 1370 return -1; 1371 } 1372 // Now validate the category name. We could use a 1373 // binary search, or a trie, if we really wanted to. 1374 int charType = getType(result); 1375 String type = name.substring(1, startIndex - 1); 1376 int length = TYPE_NAMES_.length; 1377 for (int i = 0; i < length; ++ i) { 1378 if (type.compareTo(TYPE_NAMES_[i]) == 0) { 1379 if (charType == i) { 1380 return result; 1381 } 1382 break; 1383 } 1384 } 1385 } 1386 } 1387 } 1388 return -1; 1389 } 1390 return -2; 1391 } 1392 1393 // sets of name characters, maximum name lengths ----------------------- 1394 1395 /** 1396 * Adds a codepoint into a set of ints. 1397 * Equivalent to SET_ADD. 1398 * @param set set to add to 1399 * @param ch 16 bit char to add 1400 */ add(int set[], char ch)1401 private static void add(int set[], char ch) 1402 { 1403 set[ch >>> 5] |= 1 << (ch & 0x1f); 1404 } 1405 1406 /** 1407 * Checks if a codepoint is a part of a set of ints. 1408 * Equivalent to SET_CONTAINS. 1409 * @param set set to check in 1410 * @param ch 16 bit char to check 1411 * @return true if codepoint is part of the set, false otherwise 1412 */ contains(int set[], char ch)1413 private static boolean contains(int set[], char ch) 1414 { 1415 return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0; 1416 } 1417 1418 /** 1419 * Adds all characters of the argument str and gets the length 1420 * Equivalent to calcStringSetLength. 1421 * @param set set to add all chars of str to 1422 * @param str string to add 1423 */ add(int set[], String str)1424 private static int add(int set[], String str) 1425 { 1426 int result = str.length(); 1427 1428 for (int i = result - 1; i >= 0; i --) { 1429 add(set, str.charAt(i)); 1430 } 1431 return result; 1432 } 1433 1434 /** 1435 * Adds all characters of the argument str and gets the length 1436 * Equivalent to calcStringSetLength. 1437 * @param set set to add all chars of str to 1438 * @param str string to add 1439 */ add(int set[], StringBuffer str)1440 private static int add(int set[], StringBuffer str) 1441 { 1442 int result = str.length(); 1443 1444 for (int i = result - 1; i >= 0; i --) { 1445 add(set, str.charAt(i)); 1446 } 1447 return result; 1448 } 1449 1450 /** 1451 * Adds all algorithmic names into the name set. 1452 * Equivalent to part of calcAlgNameSetsLengths. 1453 * @param maxlength length to compare to 1454 * @return the maximum length of any possible algorithmic name if it is > 1455 * maxlength, otherwise maxlength is returned. 1456 */ addAlgorithmName(int maxlength)1457 private int addAlgorithmName(int maxlength) 1458 { 1459 int result = 0; 1460 for (int i = m_algorithm_.length - 1; i >= 0; i --) { 1461 result = m_algorithm_[i].add(m_nameSet_, maxlength); 1462 if (result > maxlength) { 1463 maxlength = result; 1464 } 1465 } 1466 return maxlength; 1467 } 1468 1469 /** 1470 * Adds all extended names into the name set. 1471 * Equivalent to part of calcExtNameSetsLengths. 1472 * @param maxlength length to compare to 1473 * @return the maxlength of any possible extended name. 1474 */ addExtendedName(int maxlength)1475 private int addExtendedName(int maxlength) 1476 { 1477 for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) { 1478 // for each category, count the length of the category name 1479 // plus 9 = 1480 // 2 for <> 1481 // 1 for - 1482 // 6 for most hex digits per code point 1483 int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]); 1484 if (length > maxlength) { 1485 maxlength = length; 1486 } 1487 } 1488 return maxlength; 1489 } 1490 1491 /** 1492 * Adds names of a group to the argument set. 1493 * Equivalent to calcNameSetLength. 1494 * @param offset of the group name string in byte count 1495 * @param length of the group name string 1496 * @param tokenlength array to store the length of each token 1497 * @param set to add to 1498 * @return the length of the name string and the length of the group 1499 * string parsed 1500 */ addGroupName(int offset, int length, byte tokenlength[], int set[])1501 private int[] addGroupName(int offset, int length, byte tokenlength[], 1502 int set[]) 1503 { 1504 int resultnlength = 0; 1505 int resultplength = 0; 1506 while (resultplength < length) { 1507 char b = (char)(m_groupstring_[offset + resultplength] & 0xff); 1508 resultplength ++; 1509 if (b == ';') { 1510 break; 1511 } 1512 1513 if (b >= m_tokentable_.length) { 1514 add(set, b); // implicit letter 1515 resultnlength ++; 1516 } 1517 else { 1518 char token = m_tokentable_[b & 0x00ff]; 1519 if (token == 0xFFFE) { 1520 // this is a lead byte for a double-byte token 1521 b = (char)(b << 8 | (m_groupstring_[offset + resultplength] 1522 & 0x00ff)); 1523 token = m_tokentable_[b]; 1524 resultplength ++; 1525 } 1526 if (token == 0xFFFF) { 1527 add(set, b); 1528 resultnlength ++; 1529 } 1530 else { 1531 // count token word 1532 // use cached token length 1533 byte tlength = tokenlength[b]; 1534 if (tlength == 0) { 1535 synchronized (m_utilStringBuffer_) { 1536 m_utilStringBuffer_.setLength(0); 1537 UCharacterUtility.getNullTermByteSubString( 1538 m_utilStringBuffer_, m_tokenstring_, 1539 token); 1540 tlength = (byte)add(set, m_utilStringBuffer_); 1541 } 1542 tokenlength[b] = tlength; 1543 } 1544 resultnlength += tlength; 1545 } 1546 } 1547 } 1548 m_utilIntBuffer_[0] = resultnlength; 1549 m_utilIntBuffer_[1] = resultplength; 1550 return m_utilIntBuffer_; 1551 } 1552 1553 /** 1554 * Adds names of all group to the argument set. 1555 * Sets the data member m_max*Length_. 1556 * Method called only once. 1557 * Equivalent to calcGroupNameSetsLength. 1558 * @param maxlength length to compare to 1559 */ addGroupName(int maxlength)1560 private void addGroupName(int maxlength) 1561 { 1562 int maxisolength = 0; 1563 char offsets[] = new char[LINES_PER_GROUP_ + 2]; 1564 char lengths[] = new char[LINES_PER_GROUP_ + 2]; 1565 byte tokenlengths[] = new byte[m_tokentable_.length]; 1566 1567 // enumerate all groups 1568 // for (int i = m_groupcount_ - 1; i >= 0; i --) { 1569 for (int i = 0; i < m_groupcount_ ; i ++) { 1570 int offset = getGroupLengths(i, offsets, lengths); 1571 // enumerate all lines in each group 1572 // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0; 1573 // linenumber --) { 1574 for (int linenumber = 0; linenumber < LINES_PER_GROUP_; 1575 linenumber ++) { 1576 int lineoffset = offset + offsets[linenumber]; 1577 int length = lengths[linenumber]; 1578 if (length == 0) { 1579 continue; 1580 } 1581 1582 // read regular name 1583 int parsed[] = addGroupName(lineoffset, length, tokenlengths, 1584 m_nameSet_); 1585 if (parsed[0] > maxlength) { 1586 // 0 for name length 1587 maxlength = parsed[0]; 1588 } 1589 lineoffset += parsed[1]; 1590 if (parsed[1] >= length) { 1591 // 1 for parsed group string length 1592 continue; 1593 } 1594 length -= parsed[1]; 1595 // read Unicode 1.0 name 1596 parsed = addGroupName(lineoffset, length, tokenlengths, 1597 m_nameSet_); 1598 if (parsed[0] > maxlength) { 1599 // 0 for name length 1600 maxlength = parsed[0]; 1601 } 1602 lineoffset += parsed[1]; 1603 if (parsed[1] >= length) { 1604 // 1 for parsed group string length 1605 continue; 1606 } 1607 length -= parsed[1]; 1608 // read ISO comment 1609 parsed = addGroupName(lineoffset, length, tokenlengths, 1610 m_ISOCommentSet_); 1611 if (parsed[1] > maxisolength) { 1612 maxisolength = length; 1613 } 1614 } 1615 } 1616 1617 // set gMax... - name length last for threading 1618 m_maxISOCommentLength_ = maxisolength; 1619 m_maxNameLength_ = maxlength; 1620 } 1621 1622 /** 1623 * Sets up the name sets and the calculation of the maximum lengths. 1624 * Equivalent to calcNameSetsLengths. 1625 */ initNameSetsLengths()1626 private boolean initNameSetsLengths() 1627 { 1628 if (m_maxNameLength_ > 0) { 1629 return true; 1630 } 1631 1632 String extra = "0123456789ABCDEF<>-"; 1633 // set hex digits, used in various names, and <>-, used in extended 1634 // names 1635 for (int i = extra.length() - 1; i >= 0; i --) { 1636 add(m_nameSet_, extra.charAt(i)); 1637 } 1638 1639 // set sets and lengths from algorithmic names 1640 m_maxNameLength_ = addAlgorithmName(0); 1641 // set sets and lengths from extended names 1642 m_maxNameLength_ = addExtendedName(m_maxNameLength_); 1643 // set sets and lengths from group names, set global maximum values 1644 addGroupName(m_maxNameLength_); 1645 return true; 1646 } 1647 1648 /** 1649 * Converts the char set cset into a Unicode set uset. 1650 * Equivalent to charSetToUSet. 1651 * @param set Set of 256 bit flags corresponding to a set of chars. 1652 * @param uset USet to receive characters. Existing contents are deleted. 1653 */ convert(int set[], UnicodeSet uset)1654 private void convert(int set[], UnicodeSet uset) 1655 { 1656 uset.clear(); 1657 if (!initNameSetsLengths()) { 1658 return; 1659 } 1660 1661 // build a char string with all chars that are used in character names 1662 for (char c = 255; c > 0; c --) { 1663 if (contains(set, c)) { 1664 uset.add(c); 1665 } 1666 } 1667 } 1668 } 1669