• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 /*
3  *******************************************************************************
4  * Copyright (C) 1996-2014, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  *******************************************************************************
7  */
8 
9 package android.icu.impl;
10 
11 import java.io.IOException;
12 import java.nio.ByteBuffer;
13 import java.util.Locale;
14 import java.util.MissingResourceException;
15 
16 import android.icu.lang.UCharacter;
17 import android.icu.lang.UCharacterCategory;
18 import android.icu.text.UTF16;
19 import android.icu.text.UnicodeSet;
20 
21 /**
22 * Internal class to manage character names.
23 * Since data for names are stored
24 * in an array of char, by default indexes used in this class is refering to
25 * a 2 byte count, unless otherwise stated. Cases where the index is refering
26 * to a byte count, the index is halved and depending on whether the index is
27 * even or odd, the MSB or LSB of the result char at the halved index is
28 * returned. For indexes to an array of int, the index is multiplied by 2,
29 * result char at the multiplied index and its following char is returned as an
30 * int.
31 * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
32 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
33 * @author Syn Wee Quek
34 * @hide Only a subset of ICU is exposed in Android
35 */
36 
37 public final class UCharacterName
38 {
39     // public data members ----------------------------------------------
40 
41     /*
42      * public singleton instance
43      */
44     public static final UCharacterName INSTANCE;
45 
46     static {
47         try {
48             INSTANCE = new UCharacterName();
49         } catch (IOException e) {
50             ///CLOVER:OFF
51             throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","","");
52             ///CLOVER:ON
53         }
54     }
55 
56     /**
57     * Number of lines per group
58     * 1 << GROUP_SHIFT_
59     */
60     public static final int LINES_PER_GROUP_ = 1 << 5;
61     /**
62      * Maximum number of groups
63      */
64     public int m_groupcount_ = 0;
65 
66     // public methods ---------------------------------------------------
67 
68     /**
69     * Retrieve the name of a Unicode code point.
70     * Depending on <code>choice</code>, the character name written into the
71     * buffer is the "modern" name or the name that was defined in Unicode
72     * version 1.0.
73     * The name contains only "invariant" characters
74     * like A-Z, 0-9, space, and '-'.
75     *
76     * @param ch the code point for which to get the name.
77     * @param choice Selector for which name to get.
78     * @return if code point is above 0x1fff, null is returned
79     */
getName(int ch, int choice)80     public String getName(int ch, int choice)
81     {
82         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
83             choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
84             return null;
85         }
86 
87         String result = null;
88 
89         result = getAlgName(ch, choice);
90 
91         // getting normal character name
92         if (result == null || result.length() == 0) {
93             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
94                 result = getExtendedName(ch);
95             } else {
96                 result = getGroupName(ch, choice);
97             }
98         }
99 
100         return result;
101     }
102 
103     /**
104     * Find a character by its name and return its code point value
105     * @param choice selector to indicate if argument name is a Unicode 1.0
106     *        or the most current version
107     * @param name the name to search for
108     * @return code point
109     */
getCharFromName(int choice, String name)110     public int getCharFromName(int choice, String name)
111     {
112         // checks for illegal arguments
113         if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
114             name == null || name.length() == 0) {
115             return -1;
116         }
117 
118         // try extended names first
119         int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice);
120         if (result >= -1) {
121             return result;
122         }
123 
124         String upperCaseName = name.toUpperCase(Locale.ENGLISH);
125         // try algorithmic names first, if fails then try group names
126         // int result = getAlgorithmChar(choice, uppercasename);
127 
128         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
129             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
130         ) {
131             int count = 0;
132             if (m_algorithm_ != null) {
133                 count = m_algorithm_.length;
134             }
135             for (count --; count >= 0; count --) {
136                 result = m_algorithm_[count].getChar(upperCaseName);
137                 if (result >= 0) {
138                     return result;
139                 }
140             }
141         }
142 
143         if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
144             result = getGroupChar(upperCaseName,
145                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
146             if (result == -1) {
147                 result = getGroupChar(upperCaseName,
148                                       UCharacterNameChoice.CHAR_NAME_ALIAS);
149             }
150         }
151         else {
152             result = getGroupChar(upperCaseName, choice);
153         }
154         return result;
155     }
156 
157     // these are all UCharacterNameIterator use methods -------------------
158 
159     /**
160     * Reads a block of compressed lengths of 32 strings and expands them into
161     * offsets and lengths for each string. Lengths are stored with a
162     * variable-width encoding in consecutive nibbles:
163     * If a nibble<0xc, then it is the length itself (0 = empty string).
164     * If a nibble>=0xc, then it forms a length value with the following
165     * nibble.
166     * The offsets and lengths arrays must be at least 33 (one more) long
167     * because there is no check here at the end if the last nibble is still
168     * used.
169     * @param index of group string object in array
170     * @param offsets array to store the value of the string offsets
171     * @param lengths array to store the value of the string length
172     * @return next index of the data string immediately after the lengths
173     *         in terms of byte address
174     */
getGroupLengths(int index, char offsets[], char lengths[])175     public int getGroupLengths(int index, char offsets[], char lengths[])
176     {
177         char length = 0xffff;
178         byte b = 0,
179             n = 0;
180         int shift;
181         index = index * m_groupsize_; // byte count offsets of group strings
182         int stringoffset = UCharacterUtility.toInt(
183                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
184                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
185 
186         offsets[0] = 0;
187 
188         // all 32 lengths must be read to get the offset of the first group
189         // string
190         for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
191             b = m_groupstring_[stringoffset];
192             shift = 4;
193 
194             while (shift >= 0) {
195                 // getting nibble
196                 n = (byte)((b >> shift) & 0x0F);
197                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
198                     length = (char)((n - 12) << 4);
199                 }
200                 else {
201                     if (length != 0xffff) {
202                        lengths[i] = (char)((length | n) + 12);
203                     }
204                     else {
205                        lengths[i] = (char)n;
206                     }
207 
208                     if (i < LINES_PER_GROUP_) {
209                        offsets[i + 1] = (char)(offsets[i] + lengths[i]);
210                     }
211 
212                     length = 0xffff;
213                     i ++;
214                 }
215 
216                 shift -= 4;
217             }
218         }
219         return stringoffset;
220     }
221 
222     /**
223     * Gets the name of the argument group index.
224     * UnicodeData.txt uses ';' as a field separator, so no field can contain
225     * ';' as part of its contents. In unames.icu, it is marked as
226     * token[';'] == -1 only if the semicolon is used in the data file - which
227     * is iff we have Unicode 1.0 names or ISO comments or aliases.
228     * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases
229     * although we know that it will never be part of a name.
230     * Equivalent to ICU4C's expandName.
231     * @param index of the group name string in byte count
232     * @param length of the group name string
233     * @param choice of Unicode 1.0 name or the most current name
234     * @return name of the group
235     */
getGroupName(int index, int length, int choice)236     public String getGroupName(int index, int length, int choice)
237     {
238         if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
239             choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
240         ) {
241             if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
242                 /*
243                  * skip the modern name if it is not requested _and_
244                  * if the semicolon byte value is a character, not a token number
245                  */
246                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
247                 do {
248                     int oldindex = index;
249                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
250                                                        index, length, (byte)';');
251                     length -= (index - oldindex);
252                 } while(--fieldIndex>0);
253             }
254             else {
255                 // the semicolon byte is a token number, therefore only modern
256                 // names are stored in unames.dat and there is no such
257                 // requested alternate name here
258                 length = 0;
259             }
260         }
261 
262         synchronized (m_utilStringBuffer_) {
263             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
264             byte b;
265             char token;
266             for (int i = 0; i < length;) {
267                 b = m_groupstring_[index + i];
268                 i ++;
269 
270                 if (b >= m_tokentable_.length) {
271                     if (b == ';') {
272                         break;
273                     }
274                     m_utilStringBuffer_.append(b); // implicit letter
275                 }
276                 else {
277                     token = m_tokentable_[b & 0x00ff];
278                     if (token == 0xFFFE) {
279                         // this is a lead byte for a double-byte token
280                         token = m_tokentable_[b << 8 |
281                                           (m_groupstring_[index + i] & 0x00ff)];
282                         i ++;
283                     }
284                     if (token == 0xFFFF) {
285                         if (b == ';') {
286                             // skip the semicolon if we are seeking extended
287                             // names and there was no 2.0 name but there
288                             // is a 1.0 name.
289                             if (m_utilStringBuffer_.length() == 0 && choice ==
290                                    UCharacterNameChoice.EXTENDED_CHAR_NAME) {
291                                 continue;
292                             }
293                             break;
294                         }
295                         // explicit letter
296                         m_utilStringBuffer_.append((char)(b & 0x00ff));
297                     }
298                     else { // write token word
299                         UCharacterUtility.getNullTermByteSubString(
300                                 m_utilStringBuffer_, m_tokenstring_, token);
301                     }
302                 }
303             }
304 
305             if (m_utilStringBuffer_.length() > 0) {
306                 return m_utilStringBuffer_.toString();
307             }
308         }
309         return null;
310     }
311 
312     /**
313     * Retrieves the extended name
314     */
getExtendedName(int ch)315     public String getExtendedName(int ch)
316     {
317         String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
318         if (result == null) {
319             // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
320             result = getExtendedOr10Name(ch);
321         }
322         return result;
323     }
324 
325     /**
326      * Gets the group index for the codepoint, or the group before it.
327      * @param codepoint The codepoint index.
328      * @return group index containing codepoint or the group before it.
329      */
getGroup(int codepoint)330     public int getGroup(int codepoint)
331     {
332         int endGroup = m_groupcount_;
333         int msb      = getCodepointMSB(codepoint);
334         int result   = 0;
335         // binary search for the group of names that contains the one for
336         // code
337         // find the group that contains codepoint, or the highest before it
338         while (result < endGroup - 1) {
339             int gindex = (result + endGroup) >> 1;
340             if (msb < getGroupMSB(gindex)) {
341                 endGroup = gindex;
342             }
343             else {
344                 result = gindex;
345             }
346         }
347         return result;
348     }
349 
350     /**
351      * Gets the extended and 1.0 name when the most current unicode names
352      * fail
353      * @param ch codepoint
354      * @return name of codepoint extended or 1.0
355      */
getExtendedOr10Name(int ch)356     public String getExtendedOr10Name(int ch)
357     {
358         String result = null;
359         // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
360         if (result == null) {
361             int type = getType(ch);
362             // Return unknown if the table of names above is not up to
363             // date.
364             if (type >= TYPE_NAMES_.length) {
365                 result = UNKNOWN_TYPE_NAME_;
366             }
367             else {
368                 result = TYPE_NAMES_[type];
369             }
370             synchronized (m_utilStringBuffer_) {
371                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
372                 m_utilStringBuffer_.append('<');
373                 m_utilStringBuffer_.append(result);
374                 m_utilStringBuffer_.append('-');
375                 String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
376                 int zeros = 4 - chStr.length();
377                 while (zeros > 0) {
378                     m_utilStringBuffer_.append('0');
379                     zeros --;
380                 }
381                 m_utilStringBuffer_.append(chStr);
382                 m_utilStringBuffer_.append('>');
383                 result = m_utilStringBuffer_.toString();
384             }
385         }
386         return result;
387     }
388 
389     /**
390      * Gets the MSB from the group index
391      * @param gindex group index
392      * @return the MSB of the group if gindex is valid, -1 otherwise
393      */
getGroupMSB(int gindex)394     public int getGroupMSB(int gindex)
395     {
396         if (gindex >= m_groupcount_) {
397             return -1;
398         }
399         return m_groupinfo_[gindex * m_groupsize_];
400     }
401 
402     /**
403      * Gets the MSB of the codepoint
404      * @param codepoint The codepoint value.
405      * @return the MSB of the codepoint
406      */
getCodepointMSB(int codepoint)407     public static int getCodepointMSB(int codepoint)
408     {
409         return codepoint >> GROUP_SHIFT_;
410     }
411 
412     /**
413      * Gets the maximum codepoint + 1 of the group
414      * @param msb most significant byte of the group
415      * @return limit codepoint of the group
416      */
getGroupLimit(int msb)417     public static int getGroupLimit(int msb)
418     {
419         return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
420     }
421 
422     /**
423      * Gets the minimum codepoint of the group
424      * @param msb most significant byte of the group
425      * @return minimum codepoint of the group
426      */
getGroupMin(int msb)427     public static int getGroupMin(int msb)
428     {
429         return msb << GROUP_SHIFT_;
430     }
431 
432     /**
433      * Gets the offset to a group
434      * @param codepoint The codepoint value.
435      * @return offset to a group
436      */
getGroupOffset(int codepoint)437     public static int getGroupOffset(int codepoint)
438     {
439         return codepoint & GROUP_MASK_;
440     }
441 
442     /**
443      * Gets the minimum codepoint of a group
444      * @param codepoint The codepoint value.
445      * @return minimum codepoint in the group which codepoint belongs to
446      */
447     ///CLOVER:OFF
getGroupMinFromCodepoint(int codepoint)448     public static int getGroupMinFromCodepoint(int codepoint)
449     {
450         return codepoint & ~GROUP_MASK_;
451     }
452     ///CLOVER:ON
453 
454     /**
455      * Get the Algorithm range length
456      * @return Algorithm range length
457      */
getAlgorithmLength()458     public int getAlgorithmLength()
459     {
460         return m_algorithm_.length;
461     }
462 
463     /**
464      * Gets the start of the range
465      * @param index algorithm index
466      * @return algorithm range start
467      */
getAlgorithmStart(int index)468     public int getAlgorithmStart(int index)
469     {
470         return m_algorithm_[index].m_rangestart_;
471     }
472 
473     /**
474      * Gets the end of the range
475      * @param index algorithm index
476      * @return algorithm range end
477      */
getAlgorithmEnd(int index)478     public int getAlgorithmEnd(int index)
479     {
480         return m_algorithm_[index].m_rangeend_;
481     }
482 
483     /**
484      * Gets the Algorithmic name of the codepoint
485      * @param index algorithmic range index
486      * @param codepoint The codepoint value.
487      * @return algorithmic name of codepoint
488      */
getAlgorithmName(int index, int codepoint)489     public String getAlgorithmName(int index, int codepoint)
490     {
491         String result = null;
492         synchronized (m_utilStringBuffer_) {
493             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
494             m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
495             result = m_utilStringBuffer_.toString();
496         }
497         return result;
498     }
499 
500     /**
501     * Gets the group name of the character
502     * @param ch character to get the group name
503     * @param choice name choice selector to choose a unicode 1.0 or newer name
504     */
getGroupName(int ch, int choice)505     public synchronized String getGroupName(int ch, int choice)
506     {
507         // gets the msb
508         int msb   = getCodepointMSB(ch);
509         int group = getGroup(ch);
510 
511         // return this if it is an exact match
512         if (msb == m_groupinfo_[group * m_groupsize_]) {
513             int index = getGroupLengths(group, m_groupoffsets_,
514                                         m_grouplengths_);
515             int offset = ch & GROUP_MASK_;
516             return getGroupName(index + m_groupoffsets_[offset],
517                                 m_grouplengths_[offset], choice);
518         }
519 
520         return null;
521     }
522 
523     // these are transliterator use methods ---------------------------------
524 
525     /**
526      * Gets the maximum length of any codepoint name.
527      * Equivalent to uprv_getMaxCharNameLength.
528      * @return the maximum length of any codepoint name
529      */
getMaxCharNameLength()530     public int getMaxCharNameLength()
531     {
532         if (initNameSetsLengths()) {
533             return m_maxNameLength_;
534         }
535         else {
536             return 0;
537         }
538     }
539 
540     /**
541      * Gets the maximum length of any iso comments.
542      * Equivalent to uprv_getMaxISOCommentLength.
543      * @return the maximum length of any codepoint name
544      */
545     ///CLOVER:OFF
getMaxISOCommentLength()546     public int getMaxISOCommentLength()
547     {
548         if (initNameSetsLengths()) {
549             return m_maxISOCommentLength_;
550         }
551         else {
552             return 0;
553         }
554     }
555     ///CLOVER:ON
556 
557     /**
558      * Fills set with characters that are used in Unicode character names.
559      * Equivalent to uprv_getCharNameCharacters.
560      * @param set USet to receive characters. Existing contents are deleted.
561      */
getCharNameCharacters(UnicodeSet set)562     public void getCharNameCharacters(UnicodeSet set)
563     {
564         convert(m_nameSet_, set);
565     }
566 
567     /**
568      * Fills set with characters that are used in Unicode character names.
569      * Equivalent to uprv_getISOCommentCharacters.
570      * @param set USet to receive characters. Existing contents are deleted.
571      */
572     ///CLOVER:OFF
getISOCommentCharacters(UnicodeSet set)573     public void getISOCommentCharacters(UnicodeSet set)
574     {
575         convert(m_ISOCommentSet_, set);
576     }
577     ///CLOVER:ON
578 
579     // package private inner class --------------------------------------
580 
581     /**
582     * Algorithmic name class
583     */
584     static final class AlgorithmName
585     {
586         // package private data members ----------------------------------
587 
588         /**
589         * Constant type value of the different AlgorithmName
590         */
591         static final int TYPE_0_ = 0;
592         static final int TYPE_1_ = 1;
593 
594         // package private constructors ----------------------------------
595 
596         /**
597         * Constructor
598         */
AlgorithmName()599         AlgorithmName()
600         {
601         }
602 
603         // package private methods ---------------------------------------
604 
605         /**
606         * Sets the information for accessing the algorithmic names
607         * @param rangestart starting code point that lies within this name group
608         * @param rangeend end code point that lies within this name group
609         * @param type algorithm type. There's 2 kinds of algorithmic type. First
610         *        which uses code point as part of its name and the other uses
611         *        variant postfix strings
612         * @param variant algorithmic variant
613         * @return true if values are valid
614         */
setInfo(int rangestart, int rangeend, byte type, byte variant)615         boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
616         {
617             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
618                 && rangeend <= UCharacter.MAX_VALUE &&
619                 (type == TYPE_0_ || type == TYPE_1_)) {
620                 m_rangestart_ = rangestart;
621                 m_rangeend_ = rangeend;
622                 m_type_ = type;
623                 m_variant_ = variant;
624                 return true;
625             }
626             return false;
627         }
628 
629         /**
630         * Sets the factor data
631         * @param factor Array of factor
632         * @return true if factors are valid
633         */
setFactor(char factor[])634         boolean setFactor(char factor[])
635         {
636             if (factor.length == m_variant_) {
637                 m_factor_ = factor;
638                 return true;
639             }
640             return false;
641         }
642 
643         /**
644         * Sets the name prefix
645         * @param prefix
646         * @return true if prefix is set
647         */
setPrefix(String prefix)648         boolean setPrefix(String prefix)
649         {
650             if (prefix != null && prefix.length() > 0) {
651                 m_prefix_ = prefix;
652                 return true;
653             }
654             return false;
655         }
656 
657         /**
658         * Sets the variant factorized name data
659         * @param string variant factorized name data
660         * @return true if values are set
661         */
setFactorString(byte string[])662         boolean setFactorString(byte string[])
663         {
664             // factor and variant string can be empty for things like
665             // hanggul code points
666             m_factorstring_ = string;
667             return true;
668         }
669 
670         /**
671         * Checks if code point lies in Algorithm object at index
672         * @param ch code point
673         */
contains(int ch)674         boolean contains(int ch)
675         {
676             return m_rangestart_ <= ch && ch <= m_rangeend_;
677         }
678 
679         /**
680         * Appends algorithm name of code point into StringBuffer.
681         * Note this method does not check for validity of code point in Algorithm,
682         * result is undefined if code point does not belong in Algorithm.
683         * @param ch code point
684         * @param str StringBuffer to append to
685         */
appendName(int ch, StringBuffer str)686         void appendName(int ch, StringBuffer str)
687         {
688             str.append(m_prefix_);
689             switch (m_type_)
690             {
691                 case TYPE_0_:
692                     // prefix followed by hex digits indicating variants
693                 str.append(Utility.hex(ch,m_variant_));
694                     break;
695                 case TYPE_1_:
696                     // prefix followed by factorized-elements
697                     int offset = ch - m_rangestart_;
698                     int indexes[] = m_utilIntBuffer_;
699                     int factor;
700 
701                     // write elements according to the factors
702                     // the factorized elements are determined by modulo
703                     // arithmetic
704                     synchronized (m_utilIntBuffer_) {
705                         for (int i = m_variant_ - 1; i > 0; i --)
706                         {
707                             factor = m_factor_[i] & 0x00FF;
708                             indexes[i] = offset % factor;
709                             offset /= factor;
710                         }
711 
712                         // we don't need to calculate the last modulus because
713                         // start <= code <= end guarantees here that
714                         // code <= factors[0]
715                         indexes[0] = offset;
716 
717                         // joining up the factorized strings
718                         str.append(getFactorString(indexes, m_variant_));
719                     }
720                     break;
721             }
722         }
723 
724         /**
725         * Gets the character for the argument algorithmic name
726         * @return the algorithmic char or -1 otherwise.
727         */
getChar(String name)728         int getChar(String name)
729         {
730             int prefixlen = m_prefix_.length();
731             if (name.length() < prefixlen ||
732                 !m_prefix_.equals(name.substring(0, prefixlen))) {
733                 return -1;
734             }
735 
736             switch (m_type_)
737             {
738                 case TYPE_0_ :
739                 try
740                 {
741                     int result = Integer.parseInt(name.substring(prefixlen),
742                                                   16);
743                     // does it fit into the range?
744                     if (m_rangestart_ <= result && result <= m_rangeend_) {
745                         return result;
746                     }
747                 }
748                 catch (NumberFormatException e)
749                 {
750                     return -1;
751                 }
752                 break;
753                 case TYPE_1_ :
754                     // repetitative suffix name comparison done here
755                     // offset is the character code - start
756                     for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
757                     {
758                         int offset = ch - m_rangestart_;
759                         int indexes[] = m_utilIntBuffer_;
760                         int factor;
761 
762                         // write elements according to the factors
763                         // the factorized elements are determined by modulo
764                         // arithmetic
765                         synchronized (m_utilIntBuffer_) {
766                             for (int i = m_variant_ - 1; i > 0; i --)
767                             {
768                                 factor = m_factor_[i] & 0x00FF;
769                                 indexes[i] = offset % factor;
770                                 offset /= factor;
771                             }
772 
773                             // we don't need to calculate the last modulus
774                             // because start <= code <= end guarantees here that
775                             // code <= factors[0]
776                             indexes[0] = offset;
777 
778                             // joining up the factorized strings
779                             if (compareFactorString(indexes, m_variant_, name,
780                                                     prefixlen)) {
781                                 return ch;
782                             }
783                         }
784                     }
785             }
786 
787             return -1;
788         }
789 
790         /**
791          * Adds all chars in the set of algorithmic names into the set.
792          * Equivalent to part of calcAlgNameSetsLengths.
793          * @param set int set to add the chars of the algorithm names into
794          * @param maxlength maximum length to compare to
795          * @return the length that is either maxlength of the length of this
796          *         algorithm name if it is longer than maxlength
797          */
add(int set[], int maxlength)798         int add(int set[], int maxlength)
799         {
800             // prefix length
801             int length = UCharacterName.add(set, m_prefix_);
802             switch (m_type_) {
803                 case TYPE_0_ : {
804                     // name = prefix + (range->variant times) hex-digits
805                     // prefix
806                     length += m_variant_;
807                     /* synwee to check
808                      * addString(set, (const char *)(range + 1))
809                                        + range->variant;*/
810                     break;
811                 }
812                 case TYPE_1_ : {
813                     // name = prefix factorized-elements
814                     // get the set and maximum factor suffix length for each
815                     // factor
816                     for (int i = m_variant_ - 1; i > 0; i --)
817                     {
818                         int maxfactorlength = 0;
819                         int count = 0;
820                         for (int factor = m_factor_[i]; factor > 0; -- factor) {
821                             synchronized (m_utilStringBuffer_) {
822                                 m_utilStringBuffer_.delete(0,
823                                                 m_utilStringBuffer_.length());
824                                 count
825                                   = UCharacterUtility.getNullTermByteSubString(
826                                                 m_utilStringBuffer_,
827                                                 m_factorstring_, count);
828                                 UCharacterName.add(set, m_utilStringBuffer_);
829                                 if (m_utilStringBuffer_.length()
830                                                             > maxfactorlength)
831                                 {
832                                     maxfactorlength
833                                                 = m_utilStringBuffer_.length();
834                                 }
835                             }
836                         }
837                         length += maxfactorlength;
838                     }
839                 }
840             }
841             if (length > maxlength) {
842                 return length;
843             }
844             return maxlength;
845         }
846 
847         // private data members ------------------------------------------
848 
849         /**
850         * Algorithmic data information
851         */
852         private int m_rangestart_;
853         private int m_rangeend_;
854         private byte m_type_;
855         private byte m_variant_;
856         private char m_factor_[];
857         private String m_prefix_;
858         private byte m_factorstring_[];
859         /**
860          * Utility StringBuffer
861          */
862         private StringBuffer m_utilStringBuffer_ = new StringBuffer();
863         /**
864          * Utility int buffer
865          */
866         private int m_utilIntBuffer_[] = new int[256];
867 
868         // private methods -----------------------------------------------
869 
870         /**
871         * Gets the indexth string in each of the argument factor block
872         * @param index array with each index corresponding to each factor block
873         * @param length length of the array index
874         * @return the combined string of the array of indexth factor string in
875         *         factor block
876         */
getFactorString(int index[], int length)877         private String getFactorString(int index[], int length)
878         {
879             int size = m_factor_.length;
880             if (index == null || length != size) {
881                 return null;
882             }
883 
884             synchronized (m_utilStringBuffer_) {
885                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
886                 int count = 0;
887                 int factor;
888                 size --;
889                 for (int i = 0; i <= size; i ++) {
890                     factor = m_factor_[i];
891                     count = UCharacterUtility.skipNullTermByteSubString(
892                                              m_factorstring_, count, index[i]);
893                     count = UCharacterUtility.getNullTermByteSubString(
894                                           m_utilStringBuffer_, m_factorstring_,
895                                           count);
896                     if (i != size) {
897                         count = UCharacterUtility.skipNullTermByteSubString(
898                                                        m_factorstring_, count,
899                                                        factor - index[i] - 1);
900                     }
901                 }
902                 return m_utilStringBuffer_.toString();
903             }
904         }
905 
906         /**
907         * Compares the indexth string in each of the argument factor block with
908         * the argument string
909         * @param index array with each index corresponding to each factor block
910         * @param length index array length
911         * @param str string to compare with
912         * @param offset of str to start comparison
913         * @return true if string matches
914         */
compareFactorString(int index[], int length, String str, int offset)915         private boolean compareFactorString(int index[], int length, String str,
916                                             int offset)
917         {
918             int size = m_factor_.length;
919             if (index == null || length != size)
920                 return false;
921 
922             int count = 0;
923             int strcount = offset;
924             int factor;
925             size --;
926             for (int i = 0; i <= size; i ++)
927             {
928                 factor = m_factor_[i];
929                 count = UCharacterUtility.skipNullTermByteSubString(
930                                           m_factorstring_, count, index[i]);
931                 strcount = UCharacterUtility.compareNullTermByteSubString(str,
932                                           m_factorstring_, strcount, count);
933                 if (strcount < 0) {
934                     return false;
935                 }
936 
937                 if (i != size) {
938                     count = UCharacterUtility.skipNullTermByteSubString(
939                                   m_factorstring_, count, factor - index[i]);
940                 }
941             }
942             if (strcount != str.length()) {
943                 return false;
944             }
945             return true;
946         }
947     }
948 
949     // package private data members --------------------------------------
950 
951     /**
952      * Size of each groups
953      */
954     int m_groupsize_ = 0;
955 
956     // package private methods --------------------------------------------
957 
958     /**
959     * Sets the token data
960     * @param token array of tokens
961     * @param tokenstring array of string values of the tokens
962     * @return false if there is a data error
963     */
setToken(char token[], byte tokenstring[])964     boolean setToken(char token[], byte tokenstring[])
965     {
966         if (token != null && tokenstring != null && token.length > 0 &&
967             tokenstring.length > 0) {
968             m_tokentable_ = token;
969             m_tokenstring_ = tokenstring;
970             return true;
971         }
972         return false;
973     }
974 
975     /**
976     * Set the algorithm name information array
977     * @param alg Algorithm information array
978     * @return true if the group string offset has been set correctly
979     */
setAlgorithm(AlgorithmName alg[])980     boolean setAlgorithm(AlgorithmName alg[])
981     {
982         if (alg != null && alg.length != 0) {
983             m_algorithm_ = alg;
984             return true;
985         }
986         return false;
987     }
988 
989     /**
990     * Sets the number of group and size of each group in number of char
991     * @param count number of groups
992     * @param size size of group in char
993     * @return true if group size is set correctly
994     */
setGroupCountSize(int count, int size)995     boolean setGroupCountSize(int count, int size)
996     {
997         if (count <= 0 || size <= 0) {
998             return false;
999         }
1000         m_groupcount_ = count;
1001         m_groupsize_ = size;
1002         return true;
1003     }
1004 
1005     /**
1006     * Sets the group name data
1007     * @param group index information array
1008     * @param groupstring name information array
1009     * @return false if there is a data error
1010     */
setGroup(char group[], byte groupstring[])1011     boolean setGroup(char group[], byte groupstring[])
1012     {
1013         if (group != null && groupstring != null && group.length > 0 &&
1014             groupstring.length > 0) {
1015             m_groupinfo_ = group;
1016             m_groupstring_ = groupstring;
1017             return true;
1018         }
1019         return false;
1020     }
1021 
1022     // private data members ----------------------------------------------
1023 
1024     /**
1025     * Data used in unames.icu
1026     */
1027     private char m_tokentable_[];
1028     private byte m_tokenstring_[];
1029     private char m_groupinfo_[];
1030     private byte m_groupstring_[];
1031     private AlgorithmName m_algorithm_[];
1032 
1033     /**
1034     * Group use.  Note - access must be synchronized.
1035     */
1036     private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1037     private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1038 
1039     /**
1040     * Default name of the name datafile
1041     */
1042     private static final String FILE_NAME_ = "unames.icu";
1043     /**
1044     * Shift count to retrieve group information
1045     */
1046     private static final int GROUP_SHIFT_ = 5;
1047     /**
1048     * Mask to retrieve the offset for a particular character within a group
1049     */
1050     private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1051 
1052     /**
1053     * Position of offsethigh in group information array
1054     */
1055     private static final int OFFSET_HIGH_OFFSET_ = 1;
1056 
1057     /**
1058     * Position of offsetlow in group information array
1059     */
1060     private static final int OFFSET_LOW_OFFSET_ = 2;
1061     /**
1062     * Double nibble indicator, any nibble > this number has to be combined
1063     * with its following nibble
1064     */
1065     private static final int SINGLE_NIBBLE_MAX_ = 11;
1066 
1067     /*
1068      * Maximum length of character names (regular & 1.0).
1069      */
1070     //private static int MAX_NAME_LENGTH_ = 0;
1071     /*
1072      * Maximum length of ISO comments.
1073      */
1074     //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1075 
1076     /**
1077      * Set of chars used in character names (regular & 1.0).
1078      * Chars are platform-dependent (can be EBCDIC).
1079      */
1080     private int m_nameSet_[] = new int[8];
1081     /**
1082      * Set of chars used in ISO comments. (regular & 1.0).
1083      * Chars are platform-dependent (can be EBCDIC).
1084      */
1085     private int m_ISOCommentSet_[] = new int[8];
1086     /**
1087      * Utility StringBuffer
1088      */
1089     private StringBuffer m_utilStringBuffer_ = new StringBuffer();
1090     /**
1091      * Utility int buffer
1092      */
1093     private int m_utilIntBuffer_[] = new int[2];
1094     /**
1095      * Maximum ISO comment length
1096      */
1097     private int m_maxISOCommentLength_;
1098     /**
1099      * Maximum name length
1100      */
1101     private int m_maxNameLength_;
1102     /**
1103      * Type names used for extended names
1104      */
1105     private static final String TYPE_NAMES_[] = {"unassigned",
1106                                                  "uppercase letter",
1107                                                  "lowercase letter",
1108                                                  "titlecase letter",
1109                                                  "modifier letter",
1110                                                  "other letter",
1111                                                  "non spacing mark",
1112                                                  "enclosing mark",
1113                                                  "combining spacing mark",
1114                                                  "decimal digit number",
1115                                                  "letter number",
1116                                                  "other number",
1117                                                  "space separator",
1118                                                  "line separator",
1119                                                  "paragraph separator",
1120                                                  "control",
1121                                                  "format",
1122                                                  "private use area",
1123                                                  "surrogate",
1124                                                  "dash punctuation",
1125                                                  "start punctuation",
1126                                                  "end punctuation",
1127                                                  "connector punctuation",
1128                                                  "other punctuation",
1129                                                  "math symbol",
1130                                                  "currency symbol",
1131                                                  "modifier symbol",
1132                                                  "other symbol",
1133                                                  "initial punctuation",
1134                                                  "final punctuation",
1135                                                  "noncharacter",
1136                                                  "lead surrogate",
1137                                                  "trail surrogate"};
1138     /**
1139      * Unknown type name
1140      */
1141     private static final String UNKNOWN_TYPE_NAME_ = "unknown";
1142     /**
1143      * Not a character type
1144      */
1145     private static final int NON_CHARACTER_
1146                                     = UCharacterCategory.CHAR_CATEGORY_COUNT;
1147     /**
1148     * Lead surrogate type
1149     */
1150     private static final int LEAD_SURROGATE_
1151                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1152     /**
1153     * Trail surrogate type
1154     */
1155     private static final int TRAIL_SURROGATE_
1156                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1157     /**
1158     * Extended category count
1159     */
1160     static final int EXTENDED_CATEGORY_
1161                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1162 
1163     // private constructor ------------------------------------------------
1164 
1165     /**
1166     * <p>Protected constructor for use in UCharacter.</p>
1167     * @exception IOException thrown when data reading fails
1168     */
UCharacterName()1169     private UCharacterName() throws IOException
1170     {
1171         ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
1172         UCharacterNameReader reader = new UCharacterNameReader(b);
1173         reader.read(this);
1174     }
1175 
1176     // private methods ---------------------------------------------------
1177 
1178     /**
1179     * Gets the algorithmic name for the argument character
1180     * @param ch character to determine name for
1181     * @param choice name choice
1182     * @return the algorithmic name or null if not found
1183     */
getAlgName(int ch, int choice)1184     private String getAlgName(int ch, int choice)
1185     {
1186         /* Only the normative character name can be algorithmic. */
1187         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
1188             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
1189         ) {
1190             // index in terms integer index
1191             synchronized (m_utilStringBuffer_) {
1192                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
1193 
1194                 for (int index = m_algorithm_.length - 1; index >= 0; index --)
1195                 {
1196                    if (m_algorithm_[index].contains(ch)) {
1197                       m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1198                       return m_utilStringBuffer_.toString();
1199                    }
1200                 }
1201             }
1202         }
1203         return null;
1204     }
1205 
1206     /**
1207     * Getting the character with the tokenized argument name
1208     * @param name of the character
1209     * @return character with the tokenized argument name or -1 if character
1210     *         is not found
1211     */
getGroupChar(String name, int choice)1212     private synchronized int getGroupChar(String name, int choice)
1213     {
1214         for (int i = 0; i < m_groupcount_; i ++) {
1215             // populating the data set of grouptable
1216 
1217             int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1218                                                   m_grouplengths_);
1219 
1220             // shift out to function
1221             int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
1222                                       choice);
1223             if (result != -1) {
1224                 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1225                          | result;
1226             }
1227         }
1228         return -1;
1229     }
1230 
1231     /**
1232     * Compares and retrieve character if name is found within the argument
1233     * group
1234     * @param index index where the set of names reside in the group block
1235     * @param length list of lengths of the strings
1236     * @param name character name to search for
1237     * @param choice of either 1.0 or the most current unicode name
1238     * @return relative character in the group which matches name, otherwise if
1239     *         not found, -1 will be returned
1240     */
getGroupChar(int index, char length[], String name, int choice)1241     private int getGroupChar(int index, char length[], String name,
1242                              int choice)
1243     {
1244         byte b = 0;
1245         char token;
1246         int len;
1247         int namelen = name.length();
1248         int nindex;
1249         int count;
1250 
1251         for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
1252             nindex = 0;
1253             len = length[result];
1254 
1255             if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
1256                 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
1257             ) {
1258                 /*
1259                  * skip the modern name if it is not requested _and_
1260                  * if the semicolon byte value is a character, not a token number
1261                  */
1262                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
1263                 do {
1264                     int oldindex = index;
1265                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
1266                                                          index, len, (byte)';');
1267                     len -= (index - oldindex);
1268                 } while(--fieldIndex>0);
1269             }
1270 
1271             // number of tokens is > the length of the name
1272             // write each letter directly, and write a token word per token
1273             for (count = 0; count < len && nindex != -1 && nindex < namelen;
1274                 ) {
1275                 b = m_groupstring_[index + count];
1276                 count ++;
1277 
1278                 if (b >= m_tokentable_.length) {
1279                     if (name.charAt(nindex ++) != (b & 0xFF)) {
1280                         nindex = -1;
1281                     }
1282                 }
1283                 else {
1284                     token = m_tokentable_[b & 0xFF];
1285                     if (token == 0xFFFE) {
1286                         // this is a lead byte for a double-byte token
1287                         token = m_tokentable_[b << 8 |
1288                                    (m_groupstring_[index + count] & 0x00ff)];
1289                         count ++;
1290                     }
1291                     if (token == 0xFFFF) {
1292                         if (name.charAt(nindex ++) != (b & 0xFF)) {
1293                             nindex = -1;
1294                         }
1295                     }
1296                     else {
1297                         // compare token with name
1298                         nindex = UCharacterUtility.compareNullTermByteSubString(
1299                                         name, m_tokenstring_, nindex, token);
1300                     }
1301                 }
1302             }
1303 
1304             if (namelen == nindex &&
1305                 (count == len || m_groupstring_[index + count] == ';')) {
1306                 return result;
1307             }
1308 
1309             index += len;
1310         }
1311         return -1;
1312     }
1313 
1314     /**
1315     * Gets the character extended type
1316     * @param ch character to be tested
1317     * @return extended type it is associated with
1318     */
getType(int ch)1319     private static int getType(int ch)
1320     {
1321         if (UCharacterUtility.isNonCharacter(ch)) {
1322             // not a character we return a invalid category count
1323             return NON_CHARACTER_;
1324         }
1325         int result = UCharacter.getType(ch);
1326         if (result == UCharacterCategory.SURROGATE) {
1327             if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1328                 result = LEAD_SURROGATE_;
1329             }
1330             else {
1331                 result = TRAIL_SURROGATE_;
1332             }
1333         }
1334         return result;
1335     }
1336 
1337     /**
1338     * Getting the character with extended name of the form <....>.
1339     * @param name of the character to be found
1340     * @param choice name choice
1341     * @return character associated with the name, -1 if such character is not
1342     *                   found and -2 if we should continue with the search.
1343     */
getExtendedChar(String name, int choice)1344     private static int getExtendedChar(String name, int choice)
1345     {
1346         if (name.charAt(0) == '<') {
1347             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1348                 int endIndex = name.length() - 1;
1349                 if (name.charAt(endIndex) == '>') {
1350                     int startIndex = name.lastIndexOf('-');
1351                     if (startIndex >= 0) { // We've got a category.
1352                         startIndex ++;
1353                         int result = -1;
1354                         try {
1355                             result = Integer.parseInt(
1356                                         name.substring(startIndex, endIndex),
1357                                         16);
1358                         }
1359                         catch (NumberFormatException e) {
1360                             return -1;
1361                         }
1362                         // Now validate the category name. We could use a
1363                         // binary search, or a trie, if we really wanted to.
1364                         String type = name.substring(1, startIndex - 1);
1365                         int length = TYPE_NAMES_.length;
1366                         for (int i = 0; i < length; ++ i) {
1367                             if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1368                                 if (getType(result) == i) {
1369                                     return result;
1370                                 }
1371                                 break;
1372                             }
1373                         }
1374                     }
1375                 }
1376             }
1377             return -1;
1378         }
1379         return -2;
1380     }
1381 
1382     // sets of name characters, maximum name lengths -----------------------
1383 
1384     /**
1385      * Adds a codepoint into a set of ints.
1386      * Equivalent to SET_ADD.
1387      * @param set set to add to
1388      * @param ch 16 bit char to add
1389      */
add(int set[], char ch)1390     private static void add(int set[], char ch)
1391     {
1392         set[ch >>> 5] |= 1 << (ch & 0x1f);
1393     }
1394 
1395     /**
1396      * Checks if a codepoint is a part of a set of ints.
1397      * Equivalent to SET_CONTAINS.
1398      * @param set set to check in
1399      * @param ch 16 bit char to check
1400      * @return true if codepoint is part of the set, false otherwise
1401      */
contains(int set[], char ch)1402     private static boolean contains(int set[], char ch)
1403     {
1404         return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1405     }
1406 
1407     /**
1408      * Adds all characters of the argument str and gets the length
1409      * Equivalent to calcStringSetLength.
1410      * @param set set to add all chars of str to
1411      * @param str string to add
1412      */
add(int set[], String str)1413     private static int add(int set[], String str)
1414     {
1415         int result = str.length();
1416 
1417         for (int i = result - 1; i >= 0; i --) {
1418             add(set, str.charAt(i));
1419         }
1420         return result;
1421     }
1422 
1423     /**
1424      * Adds all characters of the argument str and gets the length
1425      * Equivalent to calcStringSetLength.
1426      * @param set set to add all chars of str to
1427      * @param str string to add
1428      */
add(int set[], StringBuffer str)1429     private static int add(int set[], StringBuffer str)
1430     {
1431         int result = str.length();
1432 
1433         for (int i = result - 1; i >= 0; i --) {
1434             add(set, str.charAt(i));
1435         }
1436         return result;
1437     }
1438 
1439     /**
1440      * Adds all algorithmic names into the name set.
1441      * Equivalent to part of calcAlgNameSetsLengths.
1442      * @param maxlength length to compare to
1443      * @return the maximum length of any possible algorithmic name if it is >
1444      *         maxlength, otherwise maxlength is returned.
1445      */
addAlgorithmName(int maxlength)1446     private int addAlgorithmName(int maxlength)
1447     {
1448         int result = 0;
1449         for (int i = m_algorithm_.length - 1; i >= 0; i --) {
1450             result = m_algorithm_[i].add(m_nameSet_, maxlength);
1451             if (result > maxlength) {
1452                 maxlength = result;
1453             }
1454         }
1455         return maxlength;
1456     }
1457 
1458     /**
1459      * Adds all extended names into the name set.
1460      * Equivalent to part of calcExtNameSetsLengths.
1461      * @param maxlength length to compare to
1462      * @return the maxlength of any possible extended name.
1463      */
addExtendedName(int maxlength)1464     private int addExtendedName(int maxlength)
1465     {
1466         for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
1467             // for each category, count the length of the category name
1468             // plus 9 =
1469             // 2 for <>
1470             // 1 for -
1471             // 6 for most hex digits per code point
1472             int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1473             if (length > maxlength) {
1474                 maxlength = length;
1475             }
1476         }
1477         return maxlength;
1478     }
1479 
1480     /**
1481      * Adds names of a group to the argument set.
1482      * Equivalent to calcNameSetLength.
1483      * @param offset of the group name string in byte count
1484      * @param length of the group name string
1485      * @param tokenlength array to store the length of each token
1486      * @param set to add to
1487      * @return the length of the name string and the length of the group
1488      *         string parsed
1489      */
addGroupName(int offset, int length, byte tokenlength[], int set[])1490     private int[] addGroupName(int offset, int length, byte tokenlength[],
1491                                int set[])
1492     {
1493         int resultnlength = 0;
1494         int resultplength = 0;
1495         while (resultplength < length) {
1496             char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
1497             resultplength ++;
1498             if (b == ';') {
1499                 break;
1500             }
1501 
1502             if (b >= m_tokentable_.length) {
1503                 add(set, b); // implicit letter
1504                 resultnlength ++;
1505             }
1506             else {
1507                 char token = m_tokentable_[b & 0x00ff];
1508                 if (token == 0xFFFE) {
1509                     // this is a lead byte for a double-byte token
1510                     b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
1511                                          & 0x00ff));
1512                     token = m_tokentable_[b];
1513                     resultplength ++;
1514                 }
1515                 if (token == 0xFFFF) {
1516                     add(set, b);
1517                     resultnlength ++;
1518                 }
1519                 else {
1520                     // count token word
1521                     // use cached token length
1522                     byte tlength = tokenlength[b];
1523                     if (tlength == 0) {
1524                         synchronized (m_utilStringBuffer_) {
1525                             m_utilStringBuffer_.delete(0,
1526                                                  m_utilStringBuffer_.length());
1527                             UCharacterUtility.getNullTermByteSubString(
1528                                            m_utilStringBuffer_, m_tokenstring_,
1529                                            token);
1530                             tlength = (byte)add(set, m_utilStringBuffer_);
1531                         }
1532                         tokenlength[b] = tlength;
1533                     }
1534                     resultnlength += tlength;
1535                 }
1536             }
1537         }
1538         m_utilIntBuffer_[0] = resultnlength;
1539         m_utilIntBuffer_[1] = resultplength;
1540         return m_utilIntBuffer_;
1541     }
1542 
1543     /**
1544      * Adds names of all group to the argument set.
1545      * Sets the data member m_max*Length_.
1546      * Method called only once.
1547      * Equivalent to calcGroupNameSetsLength.
1548      * @param maxlength length to compare to
1549      */
addGroupName(int maxlength)1550     private void addGroupName(int maxlength)
1551     {
1552         int maxisolength = 0;
1553         char offsets[] = new char[LINES_PER_GROUP_ + 2];
1554         char lengths[] = new char[LINES_PER_GROUP_ + 2];
1555         byte tokenlengths[] = new byte[m_tokentable_.length];
1556 
1557         // enumerate all groups
1558         // for (int i = m_groupcount_ - 1; i >= 0; i --) {
1559         for (int i = 0; i < m_groupcount_ ; i ++) {
1560             int offset = getGroupLengths(i, offsets, lengths);
1561             // enumerate all lines in each group
1562             // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1563             //    linenumber --) {
1564             for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
1565                 linenumber ++) {
1566                 int lineoffset = offset + offsets[linenumber];
1567                 int length = lengths[linenumber];
1568                 if (length == 0) {
1569                     continue;
1570                 }
1571 
1572                 // read regular name
1573                 int parsed[] = addGroupName(lineoffset, length, tokenlengths,
1574                                             m_nameSet_);
1575                 if (parsed[0] > maxlength) {
1576                     // 0 for name length
1577                     maxlength = parsed[0];
1578                 }
1579                 lineoffset += parsed[1];
1580                 if (parsed[1] >= length) {
1581                     // 1 for parsed group string length
1582                     continue;
1583                 }
1584                 length -= parsed[1];
1585                 // read Unicode 1.0 name
1586                 parsed = addGroupName(lineoffset, length, tokenlengths,
1587                                       m_nameSet_);
1588                 if (parsed[0] > maxlength) {
1589                     // 0 for name length
1590                     maxlength = parsed[0];
1591                 }
1592                 lineoffset += parsed[1];
1593                 if (parsed[1] >= length) {
1594                     // 1 for parsed group string length
1595                     continue;
1596                 }
1597                 length -= parsed[1];
1598                 // read ISO comment
1599                 parsed = addGroupName(lineoffset, length, tokenlengths,
1600                                       m_ISOCommentSet_);
1601                 if (parsed[1] > maxisolength) {
1602                     maxisolength = length;
1603                 }
1604             }
1605         }
1606 
1607         // set gMax... - name length last for threading
1608         m_maxISOCommentLength_ = maxisolength;
1609         m_maxNameLength_ = maxlength;
1610     }
1611 
1612     /**
1613      * Sets up the name sets and the calculation of the maximum lengths.
1614      * Equivalent to calcNameSetsLengths.
1615      */
initNameSetsLengths()1616     private boolean initNameSetsLengths()
1617     {
1618         if (m_maxNameLength_ > 0) {
1619             return true;
1620         }
1621 
1622         String extra = "0123456789ABCDEF<>-";
1623         // set hex digits, used in various names, and <>-, used in extended
1624         // names
1625         for (int i = extra.length() - 1; i >= 0; i --) {
1626             add(m_nameSet_, extra.charAt(i));
1627         }
1628 
1629         // set sets and lengths from algorithmic names
1630         m_maxNameLength_ = addAlgorithmName(0);
1631         // set sets and lengths from extended names
1632         m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1633         // set sets and lengths from group names, set global maximum values
1634         addGroupName(m_maxNameLength_);
1635         return true;
1636     }
1637 
1638     /**
1639      * Converts the char set cset into a Unicode set uset.
1640      * Equivalent to charSetToUSet.
1641      * @param set Set of 256 bit flags corresponding to a set of chars.
1642      * @param uset USet to receive characters. Existing contents are deleted.
1643      */
convert(int set[], UnicodeSet uset)1644     private void convert(int set[], UnicodeSet uset)
1645     {
1646         uset.clear();
1647         if (!initNameSetsLengths()) {
1648             return;
1649         }
1650 
1651         // build a char string with all chars that are used in character names
1652         for (char c = 255; c > 0; c --) {
1653             if (contains(set, c)) {
1654                 uset.add(c);
1655             }
1656         }
1657     }
1658 }
1659