1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 /* 3 ******************************************************************************* 4 * Copyright (C) 1996-2014, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************************* 7 */ 8 9 package android.icu.impl; 10 11 import java.io.IOException; 12 import java.nio.ByteBuffer; 13 import java.util.Locale; 14 import java.util.MissingResourceException; 15 16 import android.icu.lang.UCharacter; 17 import android.icu.lang.UCharacterCategory; 18 import android.icu.text.UTF16; 19 import android.icu.text.UnicodeSet; 20 21 /** 22 * Internal class to manage character names. 23 * Since data for names are stored 24 * in an array of char, by default indexes used in this class is refering to 25 * a 2 byte count, unless otherwise stated. Cases where the index is refering 26 * to a byte count, the index is halved and depending on whether the index is 27 * even or odd, the MSB or LSB of the result char at the halved index is 28 * returned. For indexes to an array of int, the index is multiplied by 2, 29 * result char at the multiplied index and its following char is returned as an 30 * int. 31 * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class 32 * Note : 0 - 0x1F are control characters without names in Unicode 3.0 33 * @author Syn Wee Quek 34 * @hide Only a subset of ICU is exposed in Android 35 */ 36 37 public final class UCharacterName 38 { 39 // public data members ---------------------------------------------- 40 41 /* 42 * public singleton instance 43 */ 44 public static final UCharacterName INSTANCE; 45 46 static { 47 try { 48 INSTANCE = new UCharacterName(); 49 } catch (IOException e) { 50 ///CLOVER:OFF 51 throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","",""); 52 ///CLOVER:ON 53 } 54 } 55 56 /** 57 * Number of lines per group 58 * 1 << GROUP_SHIFT_ 59 */ 60 public static final int LINES_PER_GROUP_ = 1 << 5; 61 /** 62 * Maximum number of groups 63 */ 64 public int m_groupcount_ = 0; 65 66 // public methods --------------------------------------------------- 67 68 /** 69 * Retrieve the name of a Unicode code point. 70 * Depending on <code>choice</code>, the character name written into the 71 * buffer is the "modern" name or the name that was defined in Unicode 72 * version 1.0. 73 * The name contains only "invariant" characters 74 * like A-Z, 0-9, space, and '-'. 75 * 76 * @param ch the code point for which to get the name. 77 * @param choice Selector for which name to get. 78 * @return if code point is above 0x1fff, null is returned 79 */ getName(int ch, int choice)80 public String getName(int ch, int choice) 81 { 82 if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || 83 choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) { 84 return null; 85 } 86 87 String result = null; 88 89 result = getAlgName(ch, choice); 90 91 // getting normal character name 92 if (result == null || result.length() == 0) { 93 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 94 result = getExtendedName(ch); 95 } else { 96 result = getGroupName(ch, choice); 97 } 98 } 99 100 return result; 101 } 102 103 /** 104 * Find a character by its name and return its code point value 105 * @param choice selector to indicate if argument name is a Unicode 1.0 106 * or the most current version 107 * @param name the name to search for 108 * @return code point 109 */ getCharFromName(int choice, String name)110 public int getCharFromName(int choice, String name) 111 { 112 // checks for illegal arguments 113 if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT || 114 name == null || name.length() == 0) { 115 return -1; 116 } 117 118 // try extended names first 119 int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice); 120 if (result >= -1) { 121 return result; 122 } 123 124 String upperCaseName = name.toUpperCase(Locale.ENGLISH); 125 // try algorithmic names first, if fails then try group names 126 // int result = getAlgorithmChar(choice, uppercasename); 127 128 if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || 129 choice == UCharacterNameChoice.EXTENDED_CHAR_NAME 130 ) { 131 int count = 0; 132 if (m_algorithm_ != null) { 133 count = m_algorithm_.length; 134 } 135 for (count --; count >= 0; count --) { 136 result = m_algorithm_[count].getChar(upperCaseName); 137 if (result >= 0) { 138 return result; 139 } 140 } 141 } 142 143 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 144 result = getGroupChar(upperCaseName, 145 UCharacterNameChoice.UNICODE_CHAR_NAME); 146 if (result == -1) { 147 result = getGroupChar(upperCaseName, 148 UCharacterNameChoice.CHAR_NAME_ALIAS); 149 } 150 } 151 else { 152 result = getGroupChar(upperCaseName, choice); 153 } 154 return result; 155 } 156 157 // these are all UCharacterNameIterator use methods ------------------- 158 159 /** 160 * Reads a block of compressed lengths of 32 strings and expands them into 161 * offsets and lengths for each string. Lengths are stored with a 162 * variable-width encoding in consecutive nibbles: 163 * If a nibble<0xc, then it is the length itself (0 = empty string). 164 * If a nibble>=0xc, then it forms a length value with the following 165 * nibble. 166 * The offsets and lengths arrays must be at least 33 (one more) long 167 * because there is no check here at the end if the last nibble is still 168 * used. 169 * @param index of group string object in array 170 * @param offsets array to store the value of the string offsets 171 * @param lengths array to store the value of the string length 172 * @return next index of the data string immediately after the lengths 173 * in terms of byte address 174 */ getGroupLengths(int index, char offsets[], char lengths[])175 public int getGroupLengths(int index, char offsets[], char lengths[]) 176 { 177 char length = 0xffff; 178 byte b = 0, 179 n = 0; 180 int shift; 181 index = index * m_groupsize_; // byte count offsets of group strings 182 int stringoffset = UCharacterUtility.toInt( 183 m_groupinfo_[index + OFFSET_HIGH_OFFSET_], 184 m_groupinfo_[index + OFFSET_LOW_OFFSET_]); 185 186 offsets[0] = 0; 187 188 // all 32 lengths must be read to get the offset of the first group 189 // string 190 for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) { 191 b = m_groupstring_[stringoffset]; 192 shift = 4; 193 194 while (shift >= 0) { 195 // getting nibble 196 n = (byte)((b >> shift) & 0x0F); 197 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) { 198 length = (char)((n - 12) << 4); 199 } 200 else { 201 if (length != 0xffff) { 202 lengths[i] = (char)((length | n) + 12); 203 } 204 else { 205 lengths[i] = (char)n; 206 } 207 208 if (i < LINES_PER_GROUP_) { 209 offsets[i + 1] = (char)(offsets[i] + lengths[i]); 210 } 211 212 length = 0xffff; 213 i ++; 214 } 215 216 shift -= 4; 217 } 218 } 219 return stringoffset; 220 } 221 222 /** 223 * Gets the name of the argument group index. 224 * UnicodeData.txt uses ';' as a field separator, so no field can contain 225 * ';' as part of its contents. In unames.icu, it is marked as 226 * token[';'] == -1 only if the semicolon is used in the data file - which 227 * is iff we have Unicode 1.0 names or ISO comments or aliases. 228 * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases 229 * although we know that it will never be part of a name. 230 * Equivalent to ICU4C's expandName. 231 * @param index of the group name string in byte count 232 * @param length of the group name string 233 * @param choice of Unicode 1.0 name or the most current name 234 * @return name of the group 235 */ getGroupName(int index, int length, int choice)236 public String getGroupName(int index, int length, int choice) 237 { 238 if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && 239 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME 240 ) { 241 if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) { 242 /* 243 * skip the modern name if it is not requested _and_ 244 * if the semicolon byte value is a character, not a token number 245 */ 246 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; 247 do { 248 int oldindex = index; 249 index += UCharacterUtility.skipByteSubString(m_groupstring_, 250 index, length, (byte)';'); 251 length -= (index - oldindex); 252 } while(--fieldIndex>0); 253 } 254 else { 255 // the semicolon byte is a token number, therefore only modern 256 // names are stored in unames.dat and there is no such 257 // requested alternate name here 258 length = 0; 259 } 260 } 261 262 synchronized (m_utilStringBuffer_) { 263 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 264 byte b; 265 char token; 266 for (int i = 0; i < length;) { 267 b = m_groupstring_[index + i]; 268 i ++; 269 270 if (b >= m_tokentable_.length) { 271 if (b == ';') { 272 break; 273 } 274 m_utilStringBuffer_.append(b); // implicit letter 275 } 276 else { 277 token = m_tokentable_[b & 0x00ff]; 278 if (token == 0xFFFE) { 279 // this is a lead byte for a double-byte token 280 token = m_tokentable_[b << 8 | 281 (m_groupstring_[index + i] & 0x00ff)]; 282 i ++; 283 } 284 if (token == 0xFFFF) { 285 if (b == ';') { 286 // skip the semicolon if we are seeking extended 287 // names and there was no 2.0 name but there 288 // is a 1.0 name. 289 if (m_utilStringBuffer_.length() == 0 && choice == 290 UCharacterNameChoice.EXTENDED_CHAR_NAME) { 291 continue; 292 } 293 break; 294 } 295 // explicit letter 296 m_utilStringBuffer_.append((char)(b & 0x00ff)); 297 } 298 else { // write token word 299 UCharacterUtility.getNullTermByteSubString( 300 m_utilStringBuffer_, m_tokenstring_, token); 301 } 302 } 303 } 304 305 if (m_utilStringBuffer_.length() > 0) { 306 return m_utilStringBuffer_.toString(); 307 } 308 } 309 return null; 310 } 311 312 /** 313 * Retrieves the extended name 314 */ getExtendedName(int ch)315 public String getExtendedName(int ch) 316 { 317 String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 318 if (result == null) { 319 // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F. 320 result = getExtendedOr10Name(ch); 321 } 322 return result; 323 } 324 325 /** 326 * Gets the group index for the codepoint, or the group before it. 327 * @param codepoint The codepoint index. 328 * @return group index containing codepoint or the group before it. 329 */ getGroup(int codepoint)330 public int getGroup(int codepoint) 331 { 332 int endGroup = m_groupcount_; 333 int msb = getCodepointMSB(codepoint); 334 int result = 0; 335 // binary search for the group of names that contains the one for 336 // code 337 // find the group that contains codepoint, or the highest before it 338 while (result < endGroup - 1) { 339 int gindex = (result + endGroup) >> 1; 340 if (msb < getGroupMSB(gindex)) { 341 endGroup = gindex; 342 } 343 else { 344 result = gindex; 345 } 346 } 347 return result; 348 } 349 350 /** 351 * Gets the extended and 1.0 name when the most current unicode names 352 * fail 353 * @param ch codepoint 354 * @return name of codepoint extended or 1.0 355 */ getExtendedOr10Name(int ch)356 public String getExtendedOr10Name(int ch) 357 { 358 String result = null; 359 // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F. 360 if (result == null) { 361 int type = getType(ch); 362 // Return unknown if the table of names above is not up to 363 // date. 364 if (type >= TYPE_NAMES_.length) { 365 result = UNKNOWN_TYPE_NAME_; 366 } 367 else { 368 result = TYPE_NAMES_[type]; 369 } 370 synchronized (m_utilStringBuffer_) { 371 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 372 m_utilStringBuffer_.append('<'); 373 m_utilStringBuffer_.append(result); 374 m_utilStringBuffer_.append('-'); 375 String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); 376 int zeros = 4 - chStr.length(); 377 while (zeros > 0) { 378 m_utilStringBuffer_.append('0'); 379 zeros --; 380 } 381 m_utilStringBuffer_.append(chStr); 382 m_utilStringBuffer_.append('>'); 383 result = m_utilStringBuffer_.toString(); 384 } 385 } 386 return result; 387 } 388 389 /** 390 * Gets the MSB from the group index 391 * @param gindex group index 392 * @return the MSB of the group if gindex is valid, -1 otherwise 393 */ getGroupMSB(int gindex)394 public int getGroupMSB(int gindex) 395 { 396 if (gindex >= m_groupcount_) { 397 return -1; 398 } 399 return m_groupinfo_[gindex * m_groupsize_]; 400 } 401 402 /** 403 * Gets the MSB of the codepoint 404 * @param codepoint The codepoint value. 405 * @return the MSB of the codepoint 406 */ getCodepointMSB(int codepoint)407 public static int getCodepointMSB(int codepoint) 408 { 409 return codepoint >> GROUP_SHIFT_; 410 } 411 412 /** 413 * Gets the maximum codepoint + 1 of the group 414 * @param msb most significant byte of the group 415 * @return limit codepoint of the group 416 */ getGroupLimit(int msb)417 public static int getGroupLimit(int msb) 418 { 419 return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_; 420 } 421 422 /** 423 * Gets the minimum codepoint of the group 424 * @param msb most significant byte of the group 425 * @return minimum codepoint of the group 426 */ getGroupMin(int msb)427 public static int getGroupMin(int msb) 428 { 429 return msb << GROUP_SHIFT_; 430 } 431 432 /** 433 * Gets the offset to a group 434 * @param codepoint The codepoint value. 435 * @return offset to a group 436 */ getGroupOffset(int codepoint)437 public static int getGroupOffset(int codepoint) 438 { 439 return codepoint & GROUP_MASK_; 440 } 441 442 /** 443 * Gets the minimum codepoint of a group 444 * @param codepoint The codepoint value. 445 * @return minimum codepoint in the group which codepoint belongs to 446 */ 447 ///CLOVER:OFF getGroupMinFromCodepoint(int codepoint)448 public static int getGroupMinFromCodepoint(int codepoint) 449 { 450 return codepoint & ~GROUP_MASK_; 451 } 452 ///CLOVER:ON 453 454 /** 455 * Get the Algorithm range length 456 * @return Algorithm range length 457 */ getAlgorithmLength()458 public int getAlgorithmLength() 459 { 460 return m_algorithm_.length; 461 } 462 463 /** 464 * Gets the start of the range 465 * @param index algorithm index 466 * @return algorithm range start 467 */ getAlgorithmStart(int index)468 public int getAlgorithmStart(int index) 469 { 470 return m_algorithm_[index].m_rangestart_; 471 } 472 473 /** 474 * Gets the end of the range 475 * @param index algorithm index 476 * @return algorithm range end 477 */ getAlgorithmEnd(int index)478 public int getAlgorithmEnd(int index) 479 { 480 return m_algorithm_[index].m_rangeend_; 481 } 482 483 /** 484 * Gets the Algorithmic name of the codepoint 485 * @param index algorithmic range index 486 * @param codepoint The codepoint value. 487 * @return algorithmic name of codepoint 488 */ getAlgorithmName(int index, int codepoint)489 public String getAlgorithmName(int index, int codepoint) 490 { 491 String result = null; 492 synchronized (m_utilStringBuffer_) { 493 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 494 m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_); 495 result = m_utilStringBuffer_.toString(); 496 } 497 return result; 498 } 499 500 /** 501 * Gets the group name of the character 502 * @param ch character to get the group name 503 * @param choice name choice selector to choose a unicode 1.0 or newer name 504 */ getGroupName(int ch, int choice)505 public synchronized String getGroupName(int ch, int choice) 506 { 507 // gets the msb 508 int msb = getCodepointMSB(ch); 509 int group = getGroup(ch); 510 511 // return this if it is an exact match 512 if (msb == m_groupinfo_[group * m_groupsize_]) { 513 int index = getGroupLengths(group, m_groupoffsets_, 514 m_grouplengths_); 515 int offset = ch & GROUP_MASK_; 516 return getGroupName(index + m_groupoffsets_[offset], 517 m_grouplengths_[offset], choice); 518 } 519 520 return null; 521 } 522 523 // these are transliterator use methods --------------------------------- 524 525 /** 526 * Gets the maximum length of any codepoint name. 527 * Equivalent to uprv_getMaxCharNameLength. 528 * @return the maximum length of any codepoint name 529 */ getMaxCharNameLength()530 public int getMaxCharNameLength() 531 { 532 if (initNameSetsLengths()) { 533 return m_maxNameLength_; 534 } 535 else { 536 return 0; 537 } 538 } 539 540 /** 541 * Gets the maximum length of any iso comments. 542 * Equivalent to uprv_getMaxISOCommentLength. 543 * @return the maximum length of any codepoint name 544 */ 545 ///CLOVER:OFF getMaxISOCommentLength()546 public int getMaxISOCommentLength() 547 { 548 if (initNameSetsLengths()) { 549 return m_maxISOCommentLength_; 550 } 551 else { 552 return 0; 553 } 554 } 555 ///CLOVER:ON 556 557 /** 558 * Fills set with characters that are used in Unicode character names. 559 * Equivalent to uprv_getCharNameCharacters. 560 * @param set USet to receive characters. Existing contents are deleted. 561 */ getCharNameCharacters(UnicodeSet set)562 public void getCharNameCharacters(UnicodeSet set) 563 { 564 convert(m_nameSet_, set); 565 } 566 567 /** 568 * Fills set with characters that are used in Unicode character names. 569 * Equivalent to uprv_getISOCommentCharacters. 570 * @param set USet to receive characters. Existing contents are deleted. 571 */ 572 ///CLOVER:OFF getISOCommentCharacters(UnicodeSet set)573 public void getISOCommentCharacters(UnicodeSet set) 574 { 575 convert(m_ISOCommentSet_, set); 576 } 577 ///CLOVER:ON 578 579 // package private inner class -------------------------------------- 580 581 /** 582 * Algorithmic name class 583 */ 584 static final class AlgorithmName 585 { 586 // package private data members ---------------------------------- 587 588 /** 589 * Constant type value of the different AlgorithmName 590 */ 591 static final int TYPE_0_ = 0; 592 static final int TYPE_1_ = 1; 593 594 // package private constructors ---------------------------------- 595 596 /** 597 * Constructor 598 */ AlgorithmName()599 AlgorithmName() 600 { 601 } 602 603 // package private methods --------------------------------------- 604 605 /** 606 * Sets the information for accessing the algorithmic names 607 * @param rangestart starting code point that lies within this name group 608 * @param rangeend end code point that lies within this name group 609 * @param type algorithm type. There's 2 kinds of algorithmic type. First 610 * which uses code point as part of its name and the other uses 611 * variant postfix strings 612 * @param variant algorithmic variant 613 * @return true if values are valid 614 */ setInfo(int rangestart, int rangeend, byte type, byte variant)615 boolean setInfo(int rangestart, int rangeend, byte type, byte variant) 616 { 617 if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend 618 && rangeend <= UCharacter.MAX_VALUE && 619 (type == TYPE_0_ || type == TYPE_1_)) { 620 m_rangestart_ = rangestart; 621 m_rangeend_ = rangeend; 622 m_type_ = type; 623 m_variant_ = variant; 624 return true; 625 } 626 return false; 627 } 628 629 /** 630 * Sets the factor data 631 * @param factor Array of factor 632 * @return true if factors are valid 633 */ setFactor(char factor[])634 boolean setFactor(char factor[]) 635 { 636 if (factor.length == m_variant_) { 637 m_factor_ = factor; 638 return true; 639 } 640 return false; 641 } 642 643 /** 644 * Sets the name prefix 645 * @param prefix 646 * @return true if prefix is set 647 */ setPrefix(String prefix)648 boolean setPrefix(String prefix) 649 { 650 if (prefix != null && prefix.length() > 0) { 651 m_prefix_ = prefix; 652 return true; 653 } 654 return false; 655 } 656 657 /** 658 * Sets the variant factorized name data 659 * @param string variant factorized name data 660 * @return true if values are set 661 */ setFactorString(byte string[])662 boolean setFactorString(byte string[]) 663 { 664 // factor and variant string can be empty for things like 665 // hanggul code points 666 m_factorstring_ = string; 667 return true; 668 } 669 670 /** 671 * Checks if code point lies in Algorithm object at index 672 * @param ch code point 673 */ contains(int ch)674 boolean contains(int ch) 675 { 676 return m_rangestart_ <= ch && ch <= m_rangeend_; 677 } 678 679 /** 680 * Appends algorithm name of code point into StringBuffer. 681 * Note this method does not check for validity of code point in Algorithm, 682 * result is undefined if code point does not belong in Algorithm. 683 * @param ch code point 684 * @param str StringBuffer to append to 685 */ appendName(int ch, StringBuffer str)686 void appendName(int ch, StringBuffer str) 687 { 688 str.append(m_prefix_); 689 switch (m_type_) 690 { 691 case TYPE_0_: 692 // prefix followed by hex digits indicating variants 693 str.append(Utility.hex(ch,m_variant_)); 694 break; 695 case TYPE_1_: 696 // prefix followed by factorized-elements 697 int offset = ch - m_rangestart_; 698 int indexes[] = m_utilIntBuffer_; 699 int factor; 700 701 // write elements according to the factors 702 // the factorized elements are determined by modulo 703 // arithmetic 704 synchronized (m_utilIntBuffer_) { 705 for (int i = m_variant_ - 1; i > 0; i --) 706 { 707 factor = m_factor_[i] & 0x00FF; 708 indexes[i] = offset % factor; 709 offset /= factor; 710 } 711 712 // we don't need to calculate the last modulus because 713 // start <= code <= end guarantees here that 714 // code <= factors[0] 715 indexes[0] = offset; 716 717 // joining up the factorized strings 718 str.append(getFactorString(indexes, m_variant_)); 719 } 720 break; 721 } 722 } 723 724 /** 725 * Gets the character for the argument algorithmic name 726 * @return the algorithmic char or -1 otherwise. 727 */ getChar(String name)728 int getChar(String name) 729 { 730 int prefixlen = m_prefix_.length(); 731 if (name.length() < prefixlen || 732 !m_prefix_.equals(name.substring(0, prefixlen))) { 733 return -1; 734 } 735 736 switch (m_type_) 737 { 738 case TYPE_0_ : 739 try 740 { 741 int result = Integer.parseInt(name.substring(prefixlen), 742 16); 743 // does it fit into the range? 744 if (m_rangestart_ <= result && result <= m_rangeend_) { 745 return result; 746 } 747 } 748 catch (NumberFormatException e) 749 { 750 return -1; 751 } 752 break; 753 case TYPE_1_ : 754 // repetitative suffix name comparison done here 755 // offset is the character code - start 756 for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++) 757 { 758 int offset = ch - m_rangestart_; 759 int indexes[] = m_utilIntBuffer_; 760 int factor; 761 762 // write elements according to the factors 763 // the factorized elements are determined by modulo 764 // arithmetic 765 synchronized (m_utilIntBuffer_) { 766 for (int i = m_variant_ - 1; i > 0; i --) 767 { 768 factor = m_factor_[i] & 0x00FF; 769 indexes[i] = offset % factor; 770 offset /= factor; 771 } 772 773 // we don't need to calculate the last modulus 774 // because start <= code <= end guarantees here that 775 // code <= factors[0] 776 indexes[0] = offset; 777 778 // joining up the factorized strings 779 if (compareFactorString(indexes, m_variant_, name, 780 prefixlen)) { 781 return ch; 782 } 783 } 784 } 785 } 786 787 return -1; 788 } 789 790 /** 791 * Adds all chars in the set of algorithmic names into the set. 792 * Equivalent to part of calcAlgNameSetsLengths. 793 * @param set int set to add the chars of the algorithm names into 794 * @param maxlength maximum length to compare to 795 * @return the length that is either maxlength of the length of this 796 * algorithm name if it is longer than maxlength 797 */ add(int set[], int maxlength)798 int add(int set[], int maxlength) 799 { 800 // prefix length 801 int length = UCharacterName.add(set, m_prefix_); 802 switch (m_type_) { 803 case TYPE_0_ : { 804 // name = prefix + (range->variant times) hex-digits 805 // prefix 806 length += m_variant_; 807 /* synwee to check 808 * addString(set, (const char *)(range + 1)) 809 + range->variant;*/ 810 break; 811 } 812 case TYPE_1_ : { 813 // name = prefix factorized-elements 814 // get the set and maximum factor suffix length for each 815 // factor 816 for (int i = m_variant_ - 1; i > 0; i --) 817 { 818 int maxfactorlength = 0; 819 int count = 0; 820 for (int factor = m_factor_[i]; factor > 0; -- factor) { 821 synchronized (m_utilStringBuffer_) { 822 m_utilStringBuffer_.delete(0, 823 m_utilStringBuffer_.length()); 824 count 825 = UCharacterUtility.getNullTermByteSubString( 826 m_utilStringBuffer_, 827 m_factorstring_, count); 828 UCharacterName.add(set, m_utilStringBuffer_); 829 if (m_utilStringBuffer_.length() 830 > maxfactorlength) 831 { 832 maxfactorlength 833 = m_utilStringBuffer_.length(); 834 } 835 } 836 } 837 length += maxfactorlength; 838 } 839 } 840 } 841 if (length > maxlength) { 842 return length; 843 } 844 return maxlength; 845 } 846 847 // private data members ------------------------------------------ 848 849 /** 850 * Algorithmic data information 851 */ 852 private int m_rangestart_; 853 private int m_rangeend_; 854 private byte m_type_; 855 private byte m_variant_; 856 private char m_factor_[]; 857 private String m_prefix_; 858 private byte m_factorstring_[]; 859 /** 860 * Utility StringBuffer 861 */ 862 private StringBuffer m_utilStringBuffer_ = new StringBuffer(); 863 /** 864 * Utility int buffer 865 */ 866 private int m_utilIntBuffer_[] = new int[256]; 867 868 // private methods ----------------------------------------------- 869 870 /** 871 * Gets the indexth string in each of the argument factor block 872 * @param index array with each index corresponding to each factor block 873 * @param length length of the array index 874 * @return the combined string of the array of indexth factor string in 875 * factor block 876 */ getFactorString(int index[], int length)877 private String getFactorString(int index[], int length) 878 { 879 int size = m_factor_.length; 880 if (index == null || length != size) { 881 return null; 882 } 883 884 synchronized (m_utilStringBuffer_) { 885 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 886 int count = 0; 887 int factor; 888 size --; 889 for (int i = 0; i <= size; i ++) { 890 factor = m_factor_[i]; 891 count = UCharacterUtility.skipNullTermByteSubString( 892 m_factorstring_, count, index[i]); 893 count = UCharacterUtility.getNullTermByteSubString( 894 m_utilStringBuffer_, m_factorstring_, 895 count); 896 if (i != size) { 897 count = UCharacterUtility.skipNullTermByteSubString( 898 m_factorstring_, count, 899 factor - index[i] - 1); 900 } 901 } 902 return m_utilStringBuffer_.toString(); 903 } 904 } 905 906 /** 907 * Compares the indexth string in each of the argument factor block with 908 * the argument string 909 * @param index array with each index corresponding to each factor block 910 * @param length index array length 911 * @param str string to compare with 912 * @param offset of str to start comparison 913 * @return true if string matches 914 */ compareFactorString(int index[], int length, String str, int offset)915 private boolean compareFactorString(int index[], int length, String str, 916 int offset) 917 { 918 int size = m_factor_.length; 919 if (index == null || length != size) 920 return false; 921 922 int count = 0; 923 int strcount = offset; 924 int factor; 925 size --; 926 for (int i = 0; i <= size; i ++) 927 { 928 factor = m_factor_[i]; 929 count = UCharacterUtility.skipNullTermByteSubString( 930 m_factorstring_, count, index[i]); 931 strcount = UCharacterUtility.compareNullTermByteSubString(str, 932 m_factorstring_, strcount, count); 933 if (strcount < 0) { 934 return false; 935 } 936 937 if (i != size) { 938 count = UCharacterUtility.skipNullTermByteSubString( 939 m_factorstring_, count, factor - index[i]); 940 } 941 } 942 if (strcount != str.length()) { 943 return false; 944 } 945 return true; 946 } 947 } 948 949 // package private data members -------------------------------------- 950 951 /** 952 * Size of each groups 953 */ 954 int m_groupsize_ = 0; 955 956 // package private methods -------------------------------------------- 957 958 /** 959 * Sets the token data 960 * @param token array of tokens 961 * @param tokenstring array of string values of the tokens 962 * @return false if there is a data error 963 */ setToken(char token[], byte tokenstring[])964 boolean setToken(char token[], byte tokenstring[]) 965 { 966 if (token != null && tokenstring != null && token.length > 0 && 967 tokenstring.length > 0) { 968 m_tokentable_ = token; 969 m_tokenstring_ = tokenstring; 970 return true; 971 } 972 return false; 973 } 974 975 /** 976 * Set the algorithm name information array 977 * @param alg Algorithm information array 978 * @return true if the group string offset has been set correctly 979 */ setAlgorithm(AlgorithmName alg[])980 boolean setAlgorithm(AlgorithmName alg[]) 981 { 982 if (alg != null && alg.length != 0) { 983 m_algorithm_ = alg; 984 return true; 985 } 986 return false; 987 } 988 989 /** 990 * Sets the number of group and size of each group in number of char 991 * @param count number of groups 992 * @param size size of group in char 993 * @return true if group size is set correctly 994 */ setGroupCountSize(int count, int size)995 boolean setGroupCountSize(int count, int size) 996 { 997 if (count <= 0 || size <= 0) { 998 return false; 999 } 1000 m_groupcount_ = count; 1001 m_groupsize_ = size; 1002 return true; 1003 } 1004 1005 /** 1006 * Sets the group name data 1007 * @param group index information array 1008 * @param groupstring name information array 1009 * @return false if there is a data error 1010 */ setGroup(char group[], byte groupstring[])1011 boolean setGroup(char group[], byte groupstring[]) 1012 { 1013 if (group != null && groupstring != null && group.length > 0 && 1014 groupstring.length > 0) { 1015 m_groupinfo_ = group; 1016 m_groupstring_ = groupstring; 1017 return true; 1018 } 1019 return false; 1020 } 1021 1022 // private data members ---------------------------------------------- 1023 1024 /** 1025 * Data used in unames.icu 1026 */ 1027 private char m_tokentable_[]; 1028 private byte m_tokenstring_[]; 1029 private char m_groupinfo_[]; 1030 private byte m_groupstring_[]; 1031 private AlgorithmName m_algorithm_[]; 1032 1033 /** 1034 * Group use. Note - access must be synchronized. 1035 */ 1036 private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1]; 1037 private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1]; 1038 1039 /** 1040 * Default name of the name datafile 1041 */ 1042 private static final String FILE_NAME_ = "unames.icu"; 1043 /** 1044 * Shift count to retrieve group information 1045 */ 1046 private static final int GROUP_SHIFT_ = 5; 1047 /** 1048 * Mask to retrieve the offset for a particular character within a group 1049 */ 1050 private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1; 1051 1052 /** 1053 * Position of offsethigh in group information array 1054 */ 1055 private static final int OFFSET_HIGH_OFFSET_ = 1; 1056 1057 /** 1058 * Position of offsetlow in group information array 1059 */ 1060 private static final int OFFSET_LOW_OFFSET_ = 2; 1061 /** 1062 * Double nibble indicator, any nibble > this number has to be combined 1063 * with its following nibble 1064 */ 1065 private static final int SINGLE_NIBBLE_MAX_ = 11; 1066 1067 /* 1068 * Maximum length of character names (regular & 1.0). 1069 */ 1070 //private static int MAX_NAME_LENGTH_ = 0; 1071 /* 1072 * Maximum length of ISO comments. 1073 */ 1074 //private static int MAX_ISO_COMMENT_LENGTH_ = 0; 1075 1076 /** 1077 * Set of chars used in character names (regular & 1.0). 1078 * Chars are platform-dependent (can be EBCDIC). 1079 */ 1080 private int m_nameSet_[] = new int[8]; 1081 /** 1082 * Set of chars used in ISO comments. (regular & 1.0). 1083 * Chars are platform-dependent (can be EBCDIC). 1084 */ 1085 private int m_ISOCommentSet_[] = new int[8]; 1086 /** 1087 * Utility StringBuffer 1088 */ 1089 private StringBuffer m_utilStringBuffer_ = new StringBuffer(); 1090 /** 1091 * Utility int buffer 1092 */ 1093 private int m_utilIntBuffer_[] = new int[2]; 1094 /** 1095 * Maximum ISO comment length 1096 */ 1097 private int m_maxISOCommentLength_; 1098 /** 1099 * Maximum name length 1100 */ 1101 private int m_maxNameLength_; 1102 /** 1103 * Type names used for extended names 1104 */ 1105 private static final String TYPE_NAMES_[] = {"unassigned", 1106 "uppercase letter", 1107 "lowercase letter", 1108 "titlecase letter", 1109 "modifier letter", 1110 "other letter", 1111 "non spacing mark", 1112 "enclosing mark", 1113 "combining spacing mark", 1114 "decimal digit number", 1115 "letter number", 1116 "other number", 1117 "space separator", 1118 "line separator", 1119 "paragraph separator", 1120 "control", 1121 "format", 1122 "private use area", 1123 "surrogate", 1124 "dash punctuation", 1125 "start punctuation", 1126 "end punctuation", 1127 "connector punctuation", 1128 "other punctuation", 1129 "math symbol", 1130 "currency symbol", 1131 "modifier symbol", 1132 "other symbol", 1133 "initial punctuation", 1134 "final punctuation", 1135 "noncharacter", 1136 "lead surrogate", 1137 "trail surrogate"}; 1138 /** 1139 * Unknown type name 1140 */ 1141 private static final String UNKNOWN_TYPE_NAME_ = "unknown"; 1142 /** 1143 * Not a character type 1144 */ 1145 private static final int NON_CHARACTER_ 1146 = UCharacterCategory.CHAR_CATEGORY_COUNT; 1147 /** 1148 * Lead surrogate type 1149 */ 1150 private static final int LEAD_SURROGATE_ 1151 = UCharacterCategory.CHAR_CATEGORY_COUNT + 1; 1152 /** 1153 * Trail surrogate type 1154 */ 1155 private static final int TRAIL_SURROGATE_ 1156 = UCharacterCategory.CHAR_CATEGORY_COUNT + 2; 1157 /** 1158 * Extended category count 1159 */ 1160 static final int EXTENDED_CATEGORY_ 1161 = UCharacterCategory.CHAR_CATEGORY_COUNT + 3; 1162 1163 // private constructor ------------------------------------------------ 1164 1165 /** 1166 * <p>Protected constructor for use in UCharacter.</p> 1167 * @exception IOException thrown when data reading fails 1168 */ UCharacterName()1169 private UCharacterName() throws IOException 1170 { 1171 ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_); 1172 UCharacterNameReader reader = new UCharacterNameReader(b); 1173 reader.read(this); 1174 } 1175 1176 // private methods --------------------------------------------------- 1177 1178 /** 1179 * Gets the algorithmic name for the argument character 1180 * @param ch character to determine name for 1181 * @param choice name choice 1182 * @return the algorithmic name or null if not found 1183 */ getAlgName(int ch, int choice)1184 private String getAlgName(int ch, int choice) 1185 { 1186 /* Only the normative character name can be algorithmic. */ 1187 if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || 1188 choice == UCharacterNameChoice.EXTENDED_CHAR_NAME 1189 ) { 1190 // index in terms integer index 1191 synchronized (m_utilStringBuffer_) { 1192 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 1193 1194 for (int index = m_algorithm_.length - 1; index >= 0; index --) 1195 { 1196 if (m_algorithm_[index].contains(ch)) { 1197 m_algorithm_[index].appendName(ch, m_utilStringBuffer_); 1198 return m_utilStringBuffer_.toString(); 1199 } 1200 } 1201 } 1202 } 1203 return null; 1204 } 1205 1206 /** 1207 * Getting the character with the tokenized argument name 1208 * @param name of the character 1209 * @return character with the tokenized argument name or -1 if character 1210 * is not found 1211 */ getGroupChar(String name, int choice)1212 private synchronized int getGroupChar(String name, int choice) 1213 { 1214 for (int i = 0; i < m_groupcount_; i ++) { 1215 // populating the data set of grouptable 1216 1217 int startgpstrindex = getGroupLengths(i, m_groupoffsets_, 1218 m_grouplengths_); 1219 1220 // shift out to function 1221 int result = getGroupChar(startgpstrindex, m_grouplengths_, name, 1222 choice); 1223 if (result != -1) { 1224 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) 1225 | result; 1226 } 1227 } 1228 return -1; 1229 } 1230 1231 /** 1232 * Compares and retrieve character if name is found within the argument 1233 * group 1234 * @param index index where the set of names reside in the group block 1235 * @param length list of lengths of the strings 1236 * @param name character name to search for 1237 * @param choice of either 1.0 or the most current unicode name 1238 * @return relative character in the group which matches name, otherwise if 1239 * not found, -1 will be returned 1240 */ getGroupChar(int index, char length[], String name, int choice)1241 private int getGroupChar(int index, char length[], String name, 1242 int choice) 1243 { 1244 byte b = 0; 1245 char token; 1246 int len; 1247 int namelen = name.length(); 1248 int nindex; 1249 int count; 1250 1251 for (int result = 0; result <= LINES_PER_GROUP_; result ++) { 1252 nindex = 0; 1253 len = length[result]; 1254 1255 if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && 1256 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME 1257 ) { 1258 /* 1259 * skip the modern name if it is not requested _and_ 1260 * if the semicolon byte value is a character, not a token number 1261 */ 1262 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; 1263 do { 1264 int oldindex = index; 1265 index += UCharacterUtility.skipByteSubString(m_groupstring_, 1266 index, len, (byte)';'); 1267 len -= (index - oldindex); 1268 } while(--fieldIndex>0); 1269 } 1270 1271 // number of tokens is > the length of the name 1272 // write each letter directly, and write a token word per token 1273 for (count = 0; count < len && nindex != -1 && nindex < namelen; 1274 ) { 1275 b = m_groupstring_[index + count]; 1276 count ++; 1277 1278 if (b >= m_tokentable_.length) { 1279 if (name.charAt(nindex ++) != (b & 0xFF)) { 1280 nindex = -1; 1281 } 1282 } 1283 else { 1284 token = m_tokentable_[b & 0xFF]; 1285 if (token == 0xFFFE) { 1286 // this is a lead byte for a double-byte token 1287 token = m_tokentable_[b << 8 | 1288 (m_groupstring_[index + count] & 0x00ff)]; 1289 count ++; 1290 } 1291 if (token == 0xFFFF) { 1292 if (name.charAt(nindex ++) != (b & 0xFF)) { 1293 nindex = -1; 1294 } 1295 } 1296 else { 1297 // compare token with name 1298 nindex = UCharacterUtility.compareNullTermByteSubString( 1299 name, m_tokenstring_, nindex, token); 1300 } 1301 } 1302 } 1303 1304 if (namelen == nindex && 1305 (count == len || m_groupstring_[index + count] == ';')) { 1306 return result; 1307 } 1308 1309 index += len; 1310 } 1311 return -1; 1312 } 1313 1314 /** 1315 * Gets the character extended type 1316 * @param ch character to be tested 1317 * @return extended type it is associated with 1318 */ getType(int ch)1319 private static int getType(int ch) 1320 { 1321 if (UCharacterUtility.isNonCharacter(ch)) { 1322 // not a character we return a invalid category count 1323 return NON_CHARACTER_; 1324 } 1325 int result = UCharacter.getType(ch); 1326 if (result == UCharacterCategory.SURROGATE) { 1327 if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 1328 result = LEAD_SURROGATE_; 1329 } 1330 else { 1331 result = TRAIL_SURROGATE_; 1332 } 1333 } 1334 return result; 1335 } 1336 1337 /** 1338 * Getting the character with extended name of the form <....>. 1339 * @param name of the character to be found 1340 * @param choice name choice 1341 * @return character associated with the name, -1 if such character is not 1342 * found and -2 if we should continue with the search. 1343 */ getExtendedChar(String name, int choice)1344 private static int getExtendedChar(String name, int choice) 1345 { 1346 if (name.charAt(0) == '<') { 1347 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 1348 int endIndex = name.length() - 1; 1349 if (name.charAt(endIndex) == '>') { 1350 int startIndex = name.lastIndexOf('-'); 1351 if (startIndex >= 0) { // We've got a category. 1352 startIndex ++; 1353 int result = -1; 1354 try { 1355 result = Integer.parseInt( 1356 name.substring(startIndex, endIndex), 1357 16); 1358 } 1359 catch (NumberFormatException e) { 1360 return -1; 1361 } 1362 // Now validate the category name. We could use a 1363 // binary search, or a trie, if we really wanted to. 1364 String type = name.substring(1, startIndex - 1); 1365 int length = TYPE_NAMES_.length; 1366 for (int i = 0; i < length; ++ i) { 1367 if (type.compareTo(TYPE_NAMES_[i]) == 0) { 1368 if (getType(result) == i) { 1369 return result; 1370 } 1371 break; 1372 } 1373 } 1374 } 1375 } 1376 } 1377 return -1; 1378 } 1379 return -2; 1380 } 1381 1382 // sets of name characters, maximum name lengths ----------------------- 1383 1384 /** 1385 * Adds a codepoint into a set of ints. 1386 * Equivalent to SET_ADD. 1387 * @param set set to add to 1388 * @param ch 16 bit char to add 1389 */ add(int set[], char ch)1390 private static void add(int set[], char ch) 1391 { 1392 set[ch >>> 5] |= 1 << (ch & 0x1f); 1393 } 1394 1395 /** 1396 * Checks if a codepoint is a part of a set of ints. 1397 * Equivalent to SET_CONTAINS. 1398 * @param set set to check in 1399 * @param ch 16 bit char to check 1400 * @return true if codepoint is part of the set, false otherwise 1401 */ contains(int set[], char ch)1402 private static boolean contains(int set[], char ch) 1403 { 1404 return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0; 1405 } 1406 1407 /** 1408 * Adds all characters of the argument str and gets the length 1409 * Equivalent to calcStringSetLength. 1410 * @param set set to add all chars of str to 1411 * @param str string to add 1412 */ add(int set[], String str)1413 private static int add(int set[], String str) 1414 { 1415 int result = str.length(); 1416 1417 for (int i = result - 1; i >= 0; i --) { 1418 add(set, str.charAt(i)); 1419 } 1420 return result; 1421 } 1422 1423 /** 1424 * Adds all characters of the argument str and gets the length 1425 * Equivalent to calcStringSetLength. 1426 * @param set set to add all chars of str to 1427 * @param str string to add 1428 */ add(int set[], StringBuffer str)1429 private static int add(int set[], StringBuffer str) 1430 { 1431 int result = str.length(); 1432 1433 for (int i = result - 1; i >= 0; i --) { 1434 add(set, str.charAt(i)); 1435 } 1436 return result; 1437 } 1438 1439 /** 1440 * Adds all algorithmic names into the name set. 1441 * Equivalent to part of calcAlgNameSetsLengths. 1442 * @param maxlength length to compare to 1443 * @return the maximum length of any possible algorithmic name if it is > 1444 * maxlength, otherwise maxlength is returned. 1445 */ addAlgorithmName(int maxlength)1446 private int addAlgorithmName(int maxlength) 1447 { 1448 int result = 0; 1449 for (int i = m_algorithm_.length - 1; i >= 0; i --) { 1450 result = m_algorithm_[i].add(m_nameSet_, maxlength); 1451 if (result > maxlength) { 1452 maxlength = result; 1453 } 1454 } 1455 return maxlength; 1456 } 1457 1458 /** 1459 * Adds all extended names into the name set. 1460 * Equivalent to part of calcExtNameSetsLengths. 1461 * @param maxlength length to compare to 1462 * @return the maxlength of any possible extended name. 1463 */ addExtendedName(int maxlength)1464 private int addExtendedName(int maxlength) 1465 { 1466 for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) { 1467 // for each category, count the length of the category name 1468 // plus 9 = 1469 // 2 for <> 1470 // 1 for - 1471 // 6 for most hex digits per code point 1472 int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]); 1473 if (length > maxlength) { 1474 maxlength = length; 1475 } 1476 } 1477 return maxlength; 1478 } 1479 1480 /** 1481 * Adds names of a group to the argument set. 1482 * Equivalent to calcNameSetLength. 1483 * @param offset of the group name string in byte count 1484 * @param length of the group name string 1485 * @param tokenlength array to store the length of each token 1486 * @param set to add to 1487 * @return the length of the name string and the length of the group 1488 * string parsed 1489 */ addGroupName(int offset, int length, byte tokenlength[], int set[])1490 private int[] addGroupName(int offset, int length, byte tokenlength[], 1491 int set[]) 1492 { 1493 int resultnlength = 0; 1494 int resultplength = 0; 1495 while (resultplength < length) { 1496 char b = (char)(m_groupstring_[offset + resultplength] & 0xff); 1497 resultplength ++; 1498 if (b == ';') { 1499 break; 1500 } 1501 1502 if (b >= m_tokentable_.length) { 1503 add(set, b); // implicit letter 1504 resultnlength ++; 1505 } 1506 else { 1507 char token = m_tokentable_[b & 0x00ff]; 1508 if (token == 0xFFFE) { 1509 // this is a lead byte for a double-byte token 1510 b = (char)(b << 8 | (m_groupstring_[offset + resultplength] 1511 & 0x00ff)); 1512 token = m_tokentable_[b]; 1513 resultplength ++; 1514 } 1515 if (token == 0xFFFF) { 1516 add(set, b); 1517 resultnlength ++; 1518 } 1519 else { 1520 // count token word 1521 // use cached token length 1522 byte tlength = tokenlength[b]; 1523 if (tlength == 0) { 1524 synchronized (m_utilStringBuffer_) { 1525 m_utilStringBuffer_.delete(0, 1526 m_utilStringBuffer_.length()); 1527 UCharacterUtility.getNullTermByteSubString( 1528 m_utilStringBuffer_, m_tokenstring_, 1529 token); 1530 tlength = (byte)add(set, m_utilStringBuffer_); 1531 } 1532 tokenlength[b] = tlength; 1533 } 1534 resultnlength += tlength; 1535 } 1536 } 1537 } 1538 m_utilIntBuffer_[0] = resultnlength; 1539 m_utilIntBuffer_[1] = resultplength; 1540 return m_utilIntBuffer_; 1541 } 1542 1543 /** 1544 * Adds names of all group to the argument set. 1545 * Sets the data member m_max*Length_. 1546 * Method called only once. 1547 * Equivalent to calcGroupNameSetsLength. 1548 * @param maxlength length to compare to 1549 */ addGroupName(int maxlength)1550 private void addGroupName(int maxlength) 1551 { 1552 int maxisolength = 0; 1553 char offsets[] = new char[LINES_PER_GROUP_ + 2]; 1554 char lengths[] = new char[LINES_PER_GROUP_ + 2]; 1555 byte tokenlengths[] = new byte[m_tokentable_.length]; 1556 1557 // enumerate all groups 1558 // for (int i = m_groupcount_ - 1; i >= 0; i --) { 1559 for (int i = 0; i < m_groupcount_ ; i ++) { 1560 int offset = getGroupLengths(i, offsets, lengths); 1561 // enumerate all lines in each group 1562 // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0; 1563 // linenumber --) { 1564 for (int linenumber = 0; linenumber < LINES_PER_GROUP_; 1565 linenumber ++) { 1566 int lineoffset = offset + offsets[linenumber]; 1567 int length = lengths[linenumber]; 1568 if (length == 0) { 1569 continue; 1570 } 1571 1572 // read regular name 1573 int parsed[] = addGroupName(lineoffset, length, tokenlengths, 1574 m_nameSet_); 1575 if (parsed[0] > maxlength) { 1576 // 0 for name length 1577 maxlength = parsed[0]; 1578 } 1579 lineoffset += parsed[1]; 1580 if (parsed[1] >= length) { 1581 // 1 for parsed group string length 1582 continue; 1583 } 1584 length -= parsed[1]; 1585 // read Unicode 1.0 name 1586 parsed = addGroupName(lineoffset, length, tokenlengths, 1587 m_nameSet_); 1588 if (parsed[0] > maxlength) { 1589 // 0 for name length 1590 maxlength = parsed[0]; 1591 } 1592 lineoffset += parsed[1]; 1593 if (parsed[1] >= length) { 1594 // 1 for parsed group string length 1595 continue; 1596 } 1597 length -= parsed[1]; 1598 // read ISO comment 1599 parsed = addGroupName(lineoffset, length, tokenlengths, 1600 m_ISOCommentSet_); 1601 if (parsed[1] > maxisolength) { 1602 maxisolength = length; 1603 } 1604 } 1605 } 1606 1607 // set gMax... - name length last for threading 1608 m_maxISOCommentLength_ = maxisolength; 1609 m_maxNameLength_ = maxlength; 1610 } 1611 1612 /** 1613 * Sets up the name sets and the calculation of the maximum lengths. 1614 * Equivalent to calcNameSetsLengths. 1615 */ initNameSetsLengths()1616 private boolean initNameSetsLengths() 1617 { 1618 if (m_maxNameLength_ > 0) { 1619 return true; 1620 } 1621 1622 String extra = "0123456789ABCDEF<>-"; 1623 // set hex digits, used in various names, and <>-, used in extended 1624 // names 1625 for (int i = extra.length() - 1; i >= 0; i --) { 1626 add(m_nameSet_, extra.charAt(i)); 1627 } 1628 1629 // set sets and lengths from algorithmic names 1630 m_maxNameLength_ = addAlgorithmName(0); 1631 // set sets and lengths from extended names 1632 m_maxNameLength_ = addExtendedName(m_maxNameLength_); 1633 // set sets and lengths from group names, set global maximum values 1634 addGroupName(m_maxNameLength_); 1635 return true; 1636 } 1637 1638 /** 1639 * Converts the char set cset into a Unicode set uset. 1640 * Equivalent to charSetToUSet. 1641 * @param set Set of 256 bit flags corresponding to a set of chars. 1642 * @param uset USet to receive characters. Existing contents are deleted. 1643 */ convert(int set[], UnicodeSet uset)1644 private void convert(int set[], UnicodeSet uset) 1645 { 1646 uset.clear(); 1647 if (!initNameSetsLengths()) { 1648 return; 1649 } 1650 1651 // build a char string with all chars that are used in character names 1652 for (char c = 255; c > 0; c --) { 1653 if (contains(set, c)) { 1654 uset.add(c); 1655 } 1656 } 1657 } 1658 } 1659