• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2014, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package ohos.global.icu.impl;
12 
13 import java.io.IOException;
14 import java.nio.ByteBuffer;
15 import java.util.Locale;
16 import java.util.MissingResourceException;
17 
18 import ohos.global.icu.lang.UCharacter;
19 import ohos.global.icu.lang.UCharacterCategory;
20 import ohos.global.icu.text.UTF16;
21 import ohos.global.icu.text.UnicodeSet;
22 
23 /**
24 * Internal class to manage character names.
25 * Since data for names are stored
26 * in an array of char, by default indexes used in this class is refering to
27 * a 2 byte count, unless otherwise stated. Cases where the index is refering
28 * to a byte count, the index is halved and depending on whether the index is
29 * even or odd, the MSB or LSB of the result char at the halved index is
30 * returned. For indexes to an array of int, the index is multiplied by 2,
31 * result char at the multiplied index and its following char is returned as an
32 * int.
33 * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
34 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
35 * @author Syn Wee Quek
36 * @hide exposed on OHOS
37 */
38 
39 public final class UCharacterName
40 {
41     // public data members ----------------------------------------------
42 
43     /*
44      * public singleton instance
45      */
46     public static final UCharacterName INSTANCE;
47 
48     static {
49         try {
50             INSTANCE = new UCharacterName();
51         } catch (IOException e) {
52             ///CLOVER:OFF
53             throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","","");
54             ///CLOVER:ON
55         }
56     }
57 
58     /**
59     * Number of lines per group
60     * 1 << GROUP_SHIFT_
61     */
62     public static final int LINES_PER_GROUP_ = 1 << 5;
63     /**
64      * Maximum number of groups
65      */
66     public int m_groupcount_ = 0;
67 
68     // public methods ---------------------------------------------------
69 
70     /**
71     * Retrieve the name of a Unicode code point.
72     * Depending on <code>choice</code>, the character name written into the
73     * buffer is the "modern" name or the name that was defined in Unicode
74     * version 1.0.
75     * The name contains only "invariant" characters
76     * like A-Z, 0-9, space, and '-'.
77     *
78     * @param ch the code point for which to get the name.
79     * @param choice Selector for which name to get.
80     * @return if code point is above 0x1fff, null is returned
81     */
getName(int ch, int choice)82     public String getName(int ch, int choice)
83     {
84         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
85             choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
86             return null;
87         }
88 
89         String result = null;
90 
91         result = getAlgName(ch, choice);
92 
93         // getting normal character name
94         if (result == null || result.length() == 0) {
95             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
96                 result = getExtendedName(ch);
97             } else {
98                 result = getGroupName(ch, choice);
99             }
100         }
101 
102         return result;
103     }
104 
105     /**
106     * Find a character by its name and return its code point value
107     * @param choice selector to indicate if argument name is a Unicode 1.0
108     *        or the most current version
109     * @param name the name to search for
110     * @return code point
111     */
getCharFromName(int choice, String name)112     public int getCharFromName(int choice, String name)
113     {
114         // checks for illegal arguments
115         if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
116             name == null || name.length() == 0) {
117             return -1;
118         }
119 
120         // try extended names first
121         int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice);
122         if (result >= -1) {
123             return result;
124         }
125 
126         String upperCaseName = name.toUpperCase(Locale.ENGLISH);
127         // try algorithmic names first, if fails then try group names
128         // int result = getAlgorithmChar(choice, uppercasename);
129 
130         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
131             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
132         ) {
133             int count = 0;
134             if (m_algorithm_ != null) {
135                 count = m_algorithm_.length;
136             }
137             for (count --; count >= 0; count --) {
138                 result = m_algorithm_[count].getChar(upperCaseName);
139                 if (result >= 0) {
140                     return result;
141                 }
142             }
143         }
144 
145         if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
146             result = getGroupChar(upperCaseName,
147                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
148             if (result == -1) {
149                 result = getGroupChar(upperCaseName,
150                                       UCharacterNameChoice.CHAR_NAME_ALIAS);
151             }
152         }
153         else {
154             result = getGroupChar(upperCaseName, choice);
155         }
156         return result;
157     }
158 
159     // these are all UCharacterNameIterator use methods -------------------
160 
161     /**
162     * Reads a block of compressed lengths of 32 strings and expands them into
163     * offsets and lengths for each string. Lengths are stored with a
164     * variable-width encoding in consecutive nibbles:
165     * If a nibble<0xc, then it is the length itself (0 = empty string).
166     * If a nibble>=0xc, then it forms a length value with the following
167     * nibble.
168     * The offsets and lengths arrays must be at least 33 (one more) long
169     * because there is no check here at the end if the last nibble is still
170     * used.
171     * @param index of group string object in array
172     * @param offsets array to store the value of the string offsets
173     * @param lengths array to store the value of the string length
174     * @return next index of the data string immediately after the lengths
175     *         in terms of byte address
176     */
getGroupLengths(int index, char offsets[], char lengths[])177     public int getGroupLengths(int index, char offsets[], char lengths[])
178     {
179         char length = 0xffff;
180         byte b = 0,
181             n = 0;
182         int shift;
183         index = index * m_groupsize_; // byte count offsets of group strings
184         int stringoffset = UCharacterUtility.toInt(
185                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
186                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
187 
188         offsets[0] = 0;
189 
190         // all 32 lengths must be read to get the offset of the first group
191         // string
192         for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
193             b = m_groupstring_[stringoffset];
194             shift = 4;
195 
196             while (shift >= 0) {
197                 // getting nibble
198                 n = (byte)((b >> shift) & 0x0F);
199                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
200                     length = (char)((n - 12) << 4);
201                 }
202                 else {
203                     if (length != 0xffff) {
204                        lengths[i] = (char)((length | n) + 12);
205                     }
206                     else {
207                        lengths[i] = (char)n;
208                     }
209 
210                     if (i < LINES_PER_GROUP_) {
211                        offsets[i + 1] = (char)(offsets[i] + lengths[i]);
212                     }
213 
214                     length = 0xffff;
215                     i ++;
216                 }
217 
218                 shift -= 4;
219             }
220         }
221         return stringoffset;
222     }
223 
224     /**
225     * Gets the name of the argument group index.
226     * UnicodeData.txt uses ';' as a field separator, so no field can contain
227     * ';' as part of its contents. In unames.icu, it is marked as
228     * token[';'] == -1 only if the semicolon is used in the data file - which
229     * is iff we have Unicode 1.0 names or ISO comments or aliases.
230     * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases
231     * although we know that it will never be part of a name.
232     * Equivalent to ICU4C's expandName.
233     * @param index of the group name string in byte count
234     * @param length of the group name string
235     * @param choice of Unicode 1.0 name or the most current name
236     * @return name of the group
237     */
getGroupName(int index, int length, int choice)238     public String getGroupName(int index, int length, int choice)
239     {
240         if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
241             choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
242         ) {
243             if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
244                 /*
245                  * skip the modern name if it is not requested _and_
246                  * if the semicolon byte value is a character, not a token number
247                  */
248                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
249                 do {
250                     int oldindex = index;
251                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
252                                                        index, length, (byte)';');
253                     length -= (index - oldindex);
254                 } while(--fieldIndex>0);
255             }
256             else {
257                 // the semicolon byte is a token number, therefore only modern
258                 // names are stored in unames.dat and there is no such
259                 // requested alternate name here
260                 length = 0;
261             }
262         }
263 
264         synchronized (m_utilStringBuffer_) {
265             m_utilStringBuffer_.setLength(0);
266             byte b;
267             char token;
268             for (int i = 0; i < length;) {
269                 b = m_groupstring_[index + i];
270                 i ++;
271 
272                 if (b >= m_tokentable_.length) {
273                     if (b == ';') {
274                         break;
275                     }
276                     m_utilStringBuffer_.append(b); // implicit letter
277                 }
278                 else {
279                     token = m_tokentable_[b & 0x00ff];
280                     if (token == 0xFFFE) {
281                         // this is a lead byte for a double-byte token
282                         token = m_tokentable_[b << 8 |
283                                           (m_groupstring_[index + i] & 0x00ff)];
284                         i ++;
285                     }
286                     if (token == 0xFFFF) {
287                         if (b == ';') {
288                             // skip the semicolon if we are seeking extended
289                             // names and there was no 2.0 name but there
290                             // is a 1.0 name.
291                             if (m_utilStringBuffer_.length() == 0 && choice ==
292                                    UCharacterNameChoice.EXTENDED_CHAR_NAME) {
293                                 continue;
294                             }
295                             break;
296                         }
297                         // explicit letter
298                         m_utilStringBuffer_.append((char)(b & 0x00ff));
299                     }
300                     else { // write token word
301                         UCharacterUtility.getNullTermByteSubString(
302                                 m_utilStringBuffer_, m_tokenstring_, token);
303                     }
304                 }
305             }
306 
307             if (m_utilStringBuffer_.length() > 0) {
308                 return m_utilStringBuffer_.toString();
309             }
310         }
311         return null;
312     }
313 
314     /**
315     * Retrieves the extended name
316     */
getExtendedName(int ch)317     public String getExtendedName(int ch)
318     {
319         String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
320         if (result == null) {
321             // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
322             result = getExtendedOr10Name(ch);
323         }
324         return result;
325     }
326 
327     /**
328      * Gets the group index for the codepoint, or the group before it.
329      * @param codepoint The codepoint index.
330      * @return group index containing codepoint or the group before it.
331      */
getGroup(int codepoint)332     public int getGroup(int codepoint)
333     {
334         int endGroup = m_groupcount_;
335         int msb      = getCodepointMSB(codepoint);
336         int result   = 0;
337         // binary search for the group of names that contains the one for
338         // code
339         // find the group that contains codepoint, or the highest before it
340         while (result < endGroup - 1) {
341             int gindex = (result + endGroup) >> 1;
342             if (msb < getGroupMSB(gindex)) {
343                 endGroup = gindex;
344             }
345             else {
346                 result = gindex;
347             }
348         }
349         return result;
350     }
351 
352     /**
353      * Gets the extended and 1.0 name when the most current unicode names
354      * fail
355      * @param ch codepoint
356      * @return name of codepoint extended or 1.0
357      */
getExtendedOr10Name(int ch)358     public String getExtendedOr10Name(int ch)
359     {
360         String result = null;
361         // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
362         if (result == null) {
363             int type = getType(ch);
364             // Return unknown if the table of names above is not up to
365             // date.
366             if (type >= TYPE_NAMES_.length) {
367                 result = UNKNOWN_TYPE_NAME_;
368             }
369             else {
370                 result = TYPE_NAMES_[type];
371             }
372             synchronized (m_utilStringBuffer_) {
373                 m_utilStringBuffer_.setLength(0);
374                 m_utilStringBuffer_.append('<');
375                 m_utilStringBuffer_.append(result);
376                 m_utilStringBuffer_.append('-');
377                 String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
378                 int zeros = 4 - chStr.length();
379                 while (zeros > 0) {
380                     m_utilStringBuffer_.append('0');
381                     zeros --;
382                 }
383                 m_utilStringBuffer_.append(chStr);
384                 m_utilStringBuffer_.append('>');
385                 result = m_utilStringBuffer_.toString();
386             }
387         }
388         return result;
389     }
390 
391     /**
392      * Gets the MSB from the group index
393      * @param gindex group index
394      * @return the MSB of the group if gindex is valid, -1 otherwise
395      */
getGroupMSB(int gindex)396     public int getGroupMSB(int gindex)
397     {
398         if (gindex >= m_groupcount_) {
399             return -1;
400         }
401         return m_groupinfo_[gindex * m_groupsize_];
402     }
403 
404     /**
405      * Gets the MSB of the codepoint
406      * @param codepoint The codepoint value.
407      * @return the MSB of the codepoint
408      */
getCodepointMSB(int codepoint)409     public static int getCodepointMSB(int codepoint)
410     {
411         return codepoint >> GROUP_SHIFT_;
412     }
413 
414     /**
415      * Gets the maximum codepoint + 1 of the group
416      * @param msb most significant byte of the group
417      * @return limit codepoint of the group
418      */
getGroupLimit(int msb)419     public static int getGroupLimit(int msb)
420     {
421         return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
422     }
423 
424     /**
425      * Gets the minimum codepoint of the group
426      * @param msb most significant byte of the group
427      * @return minimum codepoint of the group
428      */
getGroupMin(int msb)429     public static int getGroupMin(int msb)
430     {
431         return msb << GROUP_SHIFT_;
432     }
433 
434     /**
435      * Gets the offset to a group
436      * @param codepoint The codepoint value.
437      * @return offset to a group
438      */
getGroupOffset(int codepoint)439     public static int getGroupOffset(int codepoint)
440     {
441         return codepoint & GROUP_MASK_;
442     }
443 
444     /**
445      * Gets the minimum codepoint of a group
446      * @param codepoint The codepoint value.
447      * @return minimum codepoint in the group which codepoint belongs to
448      */
449     ///CLOVER:OFF
getGroupMinFromCodepoint(int codepoint)450     public static int getGroupMinFromCodepoint(int codepoint)
451     {
452         return codepoint & ~GROUP_MASK_;
453     }
454     ///CLOVER:ON
455 
456     /**
457      * Get the Algorithm range length
458      * @return Algorithm range length
459      */
getAlgorithmLength()460     public int getAlgorithmLength()
461     {
462         return m_algorithm_.length;
463     }
464 
465     /**
466      * Gets the start of the range
467      * @param index algorithm index
468      * @return algorithm range start
469      */
getAlgorithmStart(int index)470     public int getAlgorithmStart(int index)
471     {
472         return m_algorithm_[index].m_rangestart_;
473     }
474 
475     /**
476      * Gets the end of the range
477      * @param index algorithm index
478      * @return algorithm range end
479      */
getAlgorithmEnd(int index)480     public int getAlgorithmEnd(int index)
481     {
482         return m_algorithm_[index].m_rangeend_;
483     }
484 
485     /**
486      * Gets the Algorithmic name of the codepoint
487      * @param index algorithmic range index
488      * @param codepoint The codepoint value.
489      * @return algorithmic name of codepoint
490      */
getAlgorithmName(int index, int codepoint)491     public String getAlgorithmName(int index, int codepoint)
492     {
493         String result = null;
494         synchronized (m_utilStringBuffer_) {
495             m_utilStringBuffer_.setLength(0);
496             m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
497             result = m_utilStringBuffer_.toString();
498         }
499         return result;
500     }
501 
502     /**
503     * Gets the group name of the character
504     * @param ch character to get the group name
505     * @param choice name choice selector to choose a unicode 1.0 or newer name
506     */
getGroupName(int ch, int choice)507     public synchronized String getGroupName(int ch, int choice)
508     {
509         // gets the msb
510         int msb   = getCodepointMSB(ch);
511         int group = getGroup(ch);
512 
513         // return this if it is an exact match
514         if (msb == m_groupinfo_[group * m_groupsize_]) {
515             int index = getGroupLengths(group, m_groupoffsets_,
516                                         m_grouplengths_);
517             int offset = ch & GROUP_MASK_;
518             return getGroupName(index + m_groupoffsets_[offset],
519                                 m_grouplengths_[offset], choice);
520         }
521 
522         return null;
523     }
524 
525     // these are transliterator use methods ---------------------------------
526 
527     /**
528      * Gets the maximum length of any codepoint name.
529      * Equivalent to uprv_getMaxCharNameLength.
530      * @return the maximum length of any codepoint name
531      */
getMaxCharNameLength()532     public int getMaxCharNameLength()
533     {
534         if (initNameSetsLengths()) {
535             return m_maxNameLength_;
536         }
537         else {
538             return 0;
539         }
540     }
541 
542     /**
543      * Gets the maximum length of any iso comments.
544      * Equivalent to uprv_getMaxISOCommentLength.
545      * @return the maximum length of any codepoint name
546      */
547     ///CLOVER:OFF
getMaxISOCommentLength()548     public int getMaxISOCommentLength()
549     {
550         if (initNameSetsLengths()) {
551             return m_maxISOCommentLength_;
552         }
553         else {
554             return 0;
555         }
556     }
557     ///CLOVER:ON
558 
559     /**
560      * Fills set with characters that are used in Unicode character names.
561      * Equivalent to uprv_getCharNameCharacters.
562      * @param set USet to receive characters. Existing contents are deleted.
563      */
getCharNameCharacters(UnicodeSet set)564     public void getCharNameCharacters(UnicodeSet set)
565     {
566         convert(m_nameSet_, set);
567     }
568 
569     /**
570      * Fills set with characters that are used in Unicode character names.
571      * Equivalent to uprv_getISOCommentCharacters.
572      * @param set USet to receive characters. Existing contents are deleted.
573      */
574     ///CLOVER:OFF
getISOCommentCharacters(UnicodeSet set)575     public void getISOCommentCharacters(UnicodeSet set)
576     {
577         convert(m_ISOCommentSet_, set);
578     }
579     ///CLOVER:ON
580 
581     // package private inner class --------------------------------------
582 
583     /**
584     * Algorithmic name class
585     */
586     static final class AlgorithmName
587     {
588         // package private data members ----------------------------------
589 
590         /**
591         * Constant type value of the different AlgorithmName
592         */
593         static final int TYPE_0_ = 0;
594         static final int TYPE_1_ = 1;
595 
596         // package private constructors ----------------------------------
597 
598         /**
599         * Constructor
600         */
AlgorithmName()601         AlgorithmName()
602         {
603         }
604 
605         // package private methods ---------------------------------------
606 
607         /**
608         * Sets the information for accessing the algorithmic names
609         * @param rangestart starting code point that lies within this name group
610         * @param rangeend end code point that lies within this name group
611         * @param type algorithm type. There's 2 kinds of algorithmic type. First
612         *        which uses code point as part of its name and the other uses
613         *        variant postfix strings
614         * @param variant algorithmic variant
615         * @return true if values are valid
616         */
setInfo(int rangestart, int rangeend, byte type, byte variant)617         boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
618         {
619             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
620                 && rangeend <= UCharacter.MAX_VALUE &&
621                 (type == TYPE_0_ || type == TYPE_1_)) {
622                 m_rangestart_ = rangestart;
623                 m_rangeend_ = rangeend;
624                 m_type_ = type;
625                 m_variant_ = variant;
626                 return true;
627             }
628             return false;
629         }
630 
631         /**
632         * Sets the factor data
633         * @param factor Array of factor
634         * @return true if factors are valid
635         */
setFactor(char factor[])636         boolean setFactor(char factor[])
637         {
638             if (factor.length == m_variant_) {
639                 m_factor_ = factor;
640                 return true;
641             }
642             return false;
643         }
644 
645         /**
646         * Sets the name prefix
647         * @param prefix
648         * @return true if prefix is set
649         */
setPrefix(String prefix)650         boolean setPrefix(String prefix)
651         {
652             if (prefix != null && prefix.length() > 0) {
653                 m_prefix_ = prefix;
654                 return true;
655             }
656             return false;
657         }
658 
659         /**
660         * Sets the variant factorized name data
661         * @param string variant factorized name data
662         * @return true if values are set
663         */
setFactorString(byte string[])664         boolean setFactorString(byte string[])
665         {
666             // factor and variant string can be empty for things like
667             // hanggul code points
668             m_factorstring_ = string;
669             return true;
670         }
671 
672         /**
673         * Checks if code point lies in Algorithm object at index
674         * @param ch code point
675         */
contains(int ch)676         boolean contains(int ch)
677         {
678             return m_rangestart_ <= ch && ch <= m_rangeend_;
679         }
680 
681         /**
682         * Appends algorithm name of code point into StringBuffer.
683         * Note this method does not check for validity of code point in Algorithm,
684         * result is undefined if code point does not belong in Algorithm.
685         * @param ch code point
686         * @param str StringBuffer to append to
687         */
appendName(int ch, StringBuffer str)688         void appendName(int ch, StringBuffer str)
689         {
690             str.append(m_prefix_);
691             switch (m_type_)
692             {
693                 case TYPE_0_:
694                     // prefix followed by hex digits indicating variants
695                 str.append(Utility.hex(ch,m_variant_));
696                     break;
697                 case TYPE_1_:
698                     // prefix followed by factorized-elements
699                     int offset = ch - m_rangestart_;
700                     int indexes[] = m_utilIntBuffer_;
701                     int factor;
702 
703                     // write elements according to the factors
704                     // the factorized elements are determined by modulo
705                     // arithmetic
706                     synchronized (m_utilIntBuffer_) {
707                         for (int i = m_variant_ - 1; i > 0; i --)
708                         {
709                             factor = m_factor_[i] & 0x00FF;
710                             indexes[i] = offset % factor;
711                             offset /= factor;
712                         }
713 
714                         // we don't need to calculate the last modulus because
715                         // start <= code <= end guarantees here that
716                         // code <= factors[0]
717                         indexes[0] = offset;
718 
719                         // joining up the factorized strings
720                         str.append(getFactorString(indexes, m_variant_));
721                     }
722                     break;
723             }
724         }
725 
726         /**
727         * Gets the character for the argument algorithmic name
728         * @return the algorithmic char or -1 otherwise.
729         */
getChar(String name)730         int getChar(String name)
731         {
732             int prefixlen = m_prefix_.length();
733             if (name.length() < prefixlen ||
734                 !m_prefix_.equals(name.substring(0, prefixlen))) {
735                 return -1;
736             }
737 
738             switch (m_type_)
739             {
740                 case TYPE_0_ :
741                 try
742                 {
743                     int result = Integer.parseInt(name.substring(prefixlen),
744                                                   16);
745                     // does it fit into the range?
746                     if (m_rangestart_ <= result && result <= m_rangeend_) {
747                         return result;
748                     }
749                 }
750                 catch (NumberFormatException e)
751                 {
752                     return -1;
753                 }
754                 break;
755                 case TYPE_1_ :
756                     // repetitative suffix name comparison done here
757                     // offset is the character code - start
758                     for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
759                     {
760                         int offset = ch - m_rangestart_;
761                         int indexes[] = m_utilIntBuffer_;
762                         int factor;
763 
764                         // write elements according to the factors
765                         // the factorized elements are determined by modulo
766                         // arithmetic
767                         synchronized (m_utilIntBuffer_) {
768                             for (int i = m_variant_ - 1; i > 0; i --)
769                             {
770                                 factor = m_factor_[i] & 0x00FF;
771                                 indexes[i] = offset % factor;
772                                 offset /= factor;
773                             }
774 
775                             // we don't need to calculate the last modulus
776                             // because start <= code <= end guarantees here that
777                             // code <= factors[0]
778                             indexes[0] = offset;
779 
780                             // joining up the factorized strings
781                             if (compareFactorString(indexes, m_variant_, name,
782                                                     prefixlen)) {
783                                 return ch;
784                             }
785                         }
786                     }
787             }
788 
789             return -1;
790         }
791 
792         /**
793          * Adds all chars in the set of algorithmic names into the set.
794          * Equivalent to part of calcAlgNameSetsLengths.
795          * @param set int set to add the chars of the algorithm names into
796          * @param maxlength maximum length to compare to
797          * @return the length that is either maxlength of the length of this
798          *         algorithm name if it is longer than maxlength
799          */
add(int set[], int maxlength)800         int add(int set[], int maxlength)
801         {
802             // prefix length
803             int length = UCharacterName.add(set, m_prefix_);
804             switch (m_type_) {
805                 case TYPE_0_ : {
806                     // name = prefix + (range->variant times) hex-digits
807                     // prefix
808                     length += m_variant_;
809                     /* synwee to check
810                      * addString(set, (const char *)(range + 1))
811                                        + range->variant;*/
812                     break;
813                 }
814                 case TYPE_1_ : {
815                     // name = prefix factorized-elements
816                     // get the set and maximum factor suffix length for each
817                     // factor
818                     for (int i = m_variant_ - 1; i > 0; i --)
819                     {
820                         int maxfactorlength = 0;
821                         int count = 0;
822                         for (int factor = m_factor_[i]; factor > 0; -- factor) {
823                             synchronized (m_utilStringBuffer_) {
824                                 m_utilStringBuffer_.setLength(0);
825                                 count
826                                   = UCharacterUtility.getNullTermByteSubString(
827                                                 m_utilStringBuffer_,
828                                                 m_factorstring_, count);
829                                 UCharacterName.add(set, m_utilStringBuffer_);
830                                 if (m_utilStringBuffer_.length()
831                                                             > maxfactorlength)
832                                 {
833                                     maxfactorlength
834                                                 = m_utilStringBuffer_.length();
835                                 }
836                             }
837                         }
838                         length += maxfactorlength;
839                     }
840                 }
841             }
842             if (length > maxlength) {
843                 return length;
844             }
845             return maxlength;
846         }
847 
848         // private data members ------------------------------------------
849 
850         /**
851         * Algorithmic data information
852         */
853         private int m_rangestart_;
854         private int m_rangeend_;
855         private byte m_type_;
856         private byte m_variant_;
857         private char m_factor_[];
858         private String m_prefix_;
859         private byte m_factorstring_[];
860         /**
861          * Utility StringBuffer
862          */
863         private StringBuffer m_utilStringBuffer_ = new StringBuffer();
864         /**
865          * Utility int buffer
866          */
867         private int m_utilIntBuffer_[] = new int[256];
868 
869         // private methods -----------------------------------------------
870 
871         /**
872         * Gets the indexth string in each of the argument factor block
873         * @param index array with each index corresponding to each factor block
874         * @param length length of the array index
875         * @return the combined string of the array of indexth factor string in
876         *         factor block
877         */
getFactorString(int index[], int length)878         private String getFactorString(int index[], int length)
879         {
880             int size = m_factor_.length;
881             if (index == null || length != size) {
882                 return null;
883             }
884 
885             synchronized (m_utilStringBuffer_) {
886                 m_utilStringBuffer_.setLength(0);
887                 int count = 0;
888                 int factor;
889                 size --;
890                 for (int i = 0; i <= size; i ++) {
891                     factor = m_factor_[i];
892                     count = UCharacterUtility.skipNullTermByteSubString(
893                                              m_factorstring_, count, index[i]);
894                     count = UCharacterUtility.getNullTermByteSubString(
895                                           m_utilStringBuffer_, m_factorstring_,
896                                           count);
897                     if (i != size) {
898                         count = UCharacterUtility.skipNullTermByteSubString(
899                                                        m_factorstring_, count,
900                                                        factor - index[i] - 1);
901                     }
902                 }
903                 return m_utilStringBuffer_.toString();
904             }
905         }
906 
907         /**
908         * Compares the indexth string in each of the argument factor block with
909         * the argument string
910         * @param index array with each index corresponding to each factor block
911         * @param length index array length
912         * @param str string to compare with
913         * @param offset of str to start comparison
914         * @return true if string matches
915         */
compareFactorString(int index[], int length, String str, int offset)916         private boolean compareFactorString(int index[], int length, String str,
917                                             int offset)
918         {
919             int size = m_factor_.length;
920             if (index == null || length != size)
921                 return false;
922 
923             int count = 0;
924             int strcount = offset;
925             int factor;
926             size --;
927             for (int i = 0; i <= size; i ++)
928             {
929                 factor = m_factor_[i];
930                 count = UCharacterUtility.skipNullTermByteSubString(
931                                           m_factorstring_, count, index[i]);
932                 strcount = UCharacterUtility.compareNullTermByteSubString(str,
933                                           m_factorstring_, strcount, count);
934                 if (strcount < 0) {
935                     return false;
936                 }
937 
938                 if (i != size) {
939                     count = UCharacterUtility.skipNullTermByteSubString(
940                                   m_factorstring_, count, factor - index[i]);
941                 }
942             }
943             if (strcount != str.length()) {
944                 return false;
945             }
946             return true;
947         }
948     }
949 
950     // package private data members --------------------------------------
951 
952     /**
953      * Size of each groups
954      */
955     int m_groupsize_ = 0;
956 
957     // package private methods --------------------------------------------
958 
959     /**
960     * Sets the token data
961     * @param token array of tokens
962     * @param tokenstring array of string values of the tokens
963     * @return false if there is a data error
964     */
setToken(char token[], byte tokenstring[])965     boolean setToken(char token[], byte tokenstring[])
966     {
967         if (token != null && tokenstring != null && token.length > 0 &&
968             tokenstring.length > 0) {
969             m_tokentable_ = token;
970             m_tokenstring_ = tokenstring;
971             return true;
972         }
973         return false;
974     }
975 
976     /**
977     * Set the algorithm name information array
978     * @param alg Algorithm information array
979     * @return true if the group string offset has been set correctly
980     */
setAlgorithm(AlgorithmName alg[])981     boolean setAlgorithm(AlgorithmName alg[])
982     {
983         if (alg != null && alg.length != 0) {
984             m_algorithm_ = alg;
985             return true;
986         }
987         return false;
988     }
989 
990     /**
991     * Sets the number of group and size of each group in number of char
992     * @param count number of groups
993     * @param size size of group in char
994     * @return true if group size is set correctly
995     */
setGroupCountSize(int count, int size)996     boolean setGroupCountSize(int count, int size)
997     {
998         if (count <= 0 || size <= 0) {
999             return false;
1000         }
1001         m_groupcount_ = count;
1002         m_groupsize_ = size;
1003         return true;
1004     }
1005 
1006     /**
1007     * Sets the group name data
1008     * @param group index information array
1009     * @param groupstring name information array
1010     * @return false if there is a data error
1011     */
setGroup(char group[], byte groupstring[])1012     boolean setGroup(char group[], byte groupstring[])
1013     {
1014         if (group != null && groupstring != null && group.length > 0 &&
1015             groupstring.length > 0) {
1016             m_groupinfo_ = group;
1017             m_groupstring_ = groupstring;
1018             return true;
1019         }
1020         return false;
1021     }
1022 
1023     // private data members ----------------------------------------------
1024 
1025     /**
1026     * Data used in unames.icu
1027     */
1028     private char m_tokentable_[];
1029     private byte m_tokenstring_[];
1030     private char m_groupinfo_[];
1031     private byte m_groupstring_[];
1032     private AlgorithmName m_algorithm_[];
1033 
1034     /**
1035     * Group use.  Note - access must be synchronized.
1036     */
1037     private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1038     private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1039 
1040     /**
1041     * Default name of the name datafile
1042     */
1043     private static final String FILE_NAME_ = "unames.icu";
1044     /**
1045     * Shift count to retrieve group information
1046     */
1047     private static final int GROUP_SHIFT_ = 5;
1048     /**
1049     * Mask to retrieve the offset for a particular character within a group
1050     */
1051     private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1052 
1053     /**
1054     * Position of offsethigh in group information array
1055     */
1056     private static final int OFFSET_HIGH_OFFSET_ = 1;
1057 
1058     /**
1059     * Position of offsetlow in group information array
1060     */
1061     private static final int OFFSET_LOW_OFFSET_ = 2;
1062     /**
1063     * Double nibble indicator, any nibble > this number has to be combined
1064     * with its following nibble
1065     */
1066     private static final int SINGLE_NIBBLE_MAX_ = 11;
1067 
1068     /*
1069      * Maximum length of character names (regular & 1.0).
1070      */
1071     //private static int MAX_NAME_LENGTH_ = 0;
1072     /*
1073      * Maximum length of ISO comments.
1074      */
1075     //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1076 
1077     /**
1078      * Set of chars used in character names (regular & 1.0).
1079      * Chars are platform-dependent (can be EBCDIC).
1080      */
1081     private int m_nameSet_[] = new int[8];
1082     /**
1083      * Set of chars used in ISO comments. (regular & 1.0).
1084      * Chars are platform-dependent (can be EBCDIC).
1085      */
1086     private int m_ISOCommentSet_[] = new int[8];
1087     /**
1088      * Utility StringBuffer
1089      */
1090     private StringBuffer m_utilStringBuffer_ = new StringBuffer();
1091     /**
1092      * Utility int buffer
1093      */
1094     private int m_utilIntBuffer_[] = new int[2];
1095     /**
1096      * Maximum ISO comment length
1097      */
1098     private int m_maxISOCommentLength_;
1099     /**
1100      * Maximum name length
1101      */
1102     private int m_maxNameLength_;
1103     /**
1104      * Type names used for extended names
1105      */
1106     private static final String TYPE_NAMES_[] = {"unassigned",
1107                                                  "uppercase letter",
1108                                                  "lowercase letter",
1109                                                  "titlecase letter",
1110                                                  "modifier letter",
1111                                                  "other letter",
1112                                                  "non spacing mark",
1113                                                  "enclosing mark",
1114                                                  "combining spacing mark",
1115                                                  "decimal digit number",
1116                                                  "letter number",
1117                                                  "other number",
1118                                                  "space separator",
1119                                                  "line separator",
1120                                                  "paragraph separator",
1121                                                  "control",
1122                                                  "format",
1123                                                  "private use area",
1124                                                  "surrogate",
1125                                                  "dash punctuation",
1126                                                  "start punctuation",
1127                                                  "end punctuation",
1128                                                  "connector punctuation",
1129                                                  "other punctuation",
1130                                                  "math symbol",
1131                                                  "currency symbol",
1132                                                  "modifier symbol",
1133                                                  "other symbol",
1134                                                  "initial punctuation",
1135                                                  "final punctuation",
1136                                                  "noncharacter",
1137                                                  "lead surrogate",
1138                                                  "trail surrogate"};
1139     /**
1140      * Unknown type name
1141      */
1142     private static final String UNKNOWN_TYPE_NAME_ = "unknown";
1143     /**
1144      * Not a character type
1145      */
1146     private static final int NON_CHARACTER_
1147                                     = UCharacterCategory.CHAR_CATEGORY_COUNT;
1148     /**
1149     * Lead surrogate type
1150     */
1151     private static final int LEAD_SURROGATE_
1152                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1153     /**
1154     * Trail surrogate type
1155     */
1156     private static final int TRAIL_SURROGATE_
1157                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1158     /**
1159     * Extended category count
1160     */
1161     static final int EXTENDED_CATEGORY_
1162                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1163 
1164     // private constructor ------------------------------------------------
1165 
1166     /**
1167     * <p>Protected constructor for use in UCharacter.</p>
1168     * @exception IOException thrown when data reading fails
1169     */
UCharacterName()1170     private UCharacterName() throws IOException
1171     {
1172         ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
1173         UCharacterNameReader reader = new UCharacterNameReader(b);
1174         reader.read(this);
1175     }
1176 
1177     // private methods ---------------------------------------------------
1178 
1179     /**
1180     * Gets the algorithmic name for the argument character
1181     * @param ch character to determine name for
1182     * @param choice name choice
1183     * @return the algorithmic name or null if not found
1184     */
getAlgName(int ch, int choice)1185     private String getAlgName(int ch, int choice)
1186     {
1187         /* Only the normative character name can be algorithmic. */
1188         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
1189             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
1190         ) {
1191             // index in terms integer index
1192             synchronized (m_utilStringBuffer_) {
1193                 m_utilStringBuffer_.setLength(0);
1194 
1195                 for (int index = m_algorithm_.length - 1; index >= 0; index --)
1196                 {
1197                    if (m_algorithm_[index].contains(ch)) {
1198                       m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1199                       return m_utilStringBuffer_.toString();
1200                    }
1201                 }
1202             }
1203         }
1204         return null;
1205     }
1206 
1207     /**
1208     * Getting the character with the tokenized argument name
1209     * @param name of the character
1210     * @return character with the tokenized argument name or -1 if character
1211     *         is not found
1212     */
getGroupChar(String name, int choice)1213     private synchronized int getGroupChar(String name, int choice)
1214     {
1215         for (int i = 0; i < m_groupcount_; i ++) {
1216             // populating the data set of grouptable
1217 
1218             int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1219                                                   m_grouplengths_);
1220 
1221             // shift out to function
1222             int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
1223                                       choice);
1224             if (result != -1) {
1225                 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1226                          | result;
1227             }
1228         }
1229         return -1;
1230     }
1231 
1232     /**
1233     * Compares and retrieve character if name is found within the argument
1234     * group
1235     * @param index index where the set of names reside in the group block
1236     * @param length list of lengths of the strings
1237     * @param name character name to search for
1238     * @param choice of either 1.0 or the most current unicode name
1239     * @return relative character in the group which matches name, otherwise if
1240     *         not found, -1 will be returned
1241     */
getGroupChar(int index, char length[], String name, int choice)1242     private int getGroupChar(int index, char length[], String name,
1243                              int choice)
1244     {
1245         byte b = 0;
1246         char token;
1247         int len;
1248         int namelen = name.length();
1249         int nindex;
1250         int count;
1251 
1252         for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
1253             nindex = 0;
1254             len = length[result];
1255 
1256             if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
1257                 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
1258             ) {
1259                 /*
1260                  * skip the modern name if it is not requested _and_
1261                  * if the semicolon byte value is a character, not a token number
1262                  */
1263                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
1264                 do {
1265                     int oldindex = index;
1266                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
1267                                                          index, len, (byte)';');
1268                     len -= (index - oldindex);
1269                 } while(--fieldIndex>0);
1270             }
1271 
1272             // number of tokens is > the length of the name
1273             // write each letter directly, and write a token word per token
1274             for (count = 0; count < len && nindex != -1 && nindex < namelen;
1275                 ) {
1276                 b = m_groupstring_[index + count];
1277                 count ++;
1278 
1279                 if (b >= m_tokentable_.length) {
1280                     if (name.charAt(nindex ++) != (b & 0xFF)) {
1281                         nindex = -1;
1282                     }
1283                 }
1284                 else {
1285                     token = m_tokentable_[b & 0xFF];
1286                     if (token == 0xFFFE) {
1287                         // this is a lead byte for a double-byte token
1288                         token = m_tokentable_[b << 8 |
1289                                    (m_groupstring_[index + count] & 0x00ff)];
1290                         count ++;
1291                     }
1292                     if (token == 0xFFFF) {
1293                         if (name.charAt(nindex ++) != (b & 0xFF)) {
1294                             nindex = -1;
1295                         }
1296                     }
1297                     else {
1298                         // compare token with name
1299                         nindex = UCharacterUtility.compareNullTermByteSubString(
1300                                         name, m_tokenstring_, nindex, token);
1301                     }
1302                 }
1303             }
1304 
1305             if (namelen == nindex &&
1306                 (count == len || m_groupstring_[index + count] == ';')) {
1307                 return result;
1308             }
1309 
1310             index += len;
1311         }
1312         return -1;
1313     }
1314 
1315     /**
1316     * Gets the character extended type
1317     * @param ch character to be tested
1318     * @return extended type it is associated with
1319     */
getType(int ch)1320     private static int getType(int ch)
1321     {
1322         if (UCharacterUtility.isNonCharacter(ch)) {
1323             // not a character we return a invalid category count
1324             return NON_CHARACTER_;
1325         }
1326         int result = UCharacter.getType(ch);
1327         if (result == UCharacterCategory.SURROGATE) {
1328             if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1329                 result = LEAD_SURROGATE_;
1330             }
1331             else {
1332                 result = TRAIL_SURROGATE_;
1333             }
1334         }
1335         return result;
1336     }
1337 
1338     /**
1339     * Getting the character with extended name of the form <....>.
1340     * @param name of the character to be found
1341     * @param choice name choice
1342     * @return character associated with the name, -1 if such character is not
1343     *                   found and -2 if we should continue with the search.
1344     */
getExtendedChar(String name, int choice)1345     private static int getExtendedChar(String name, int choice)
1346     {
1347         if (name.charAt(0) == '<') {
1348             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1349                 int endIndex = name.length() - 1;
1350                 if (name.charAt(endIndex) == '>') {
1351                     int startIndex = name.lastIndexOf('-');
1352                     if (startIndex >= 0) { // We've got a category.
1353                         startIndex ++;
1354 
1355                         // There should be 1 to 8 hex digits.
1356                         int hexLength = endIndex - startIndex;
1357                         if (hexLength < 1 || 8 < hexLength) {
1358                             return -1;
1359                         }
1360                         int result = -1;
1361                         try {
1362                             result = Integer.parseInt(
1363                                         name.substring(startIndex, endIndex),
1364                                         16);
1365                         }
1366                         catch (NumberFormatException e) {
1367                             return -1;
1368                         }
1369                         if (result < 0 || 0x10ffff < result) {
1370                             return -1;
1371                         }
1372                         // Now validate the category name. We could use a
1373                         // binary search, or a trie, if we really wanted to.
1374                         int charType = getType(result);
1375                         String type = name.substring(1, startIndex - 1);
1376                         int length = TYPE_NAMES_.length;
1377                         for (int i = 0; i < length; ++ i) {
1378                             if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1379                                 if (charType == i) {
1380                                     return result;
1381                                 }
1382                                 break;
1383                             }
1384                         }
1385                     }
1386                 }
1387             }
1388             return -1;
1389         }
1390         return -2;
1391     }
1392 
1393     // sets of name characters, maximum name lengths -----------------------
1394 
1395     /**
1396      * Adds a codepoint into a set of ints.
1397      * Equivalent to SET_ADD.
1398      * @param set set to add to
1399      * @param ch 16 bit char to add
1400      */
add(int set[], char ch)1401     private static void add(int set[], char ch)
1402     {
1403         set[ch >>> 5] |= 1 << (ch & 0x1f);
1404     }
1405 
1406     /**
1407      * Checks if a codepoint is a part of a set of ints.
1408      * Equivalent to SET_CONTAINS.
1409      * @param set set to check in
1410      * @param ch 16 bit char to check
1411      * @return true if codepoint is part of the set, false otherwise
1412      */
contains(int set[], char ch)1413     private static boolean contains(int set[], char ch)
1414     {
1415         return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1416     }
1417 
1418     /**
1419      * Adds all characters of the argument str and gets the length
1420      * Equivalent to calcStringSetLength.
1421      * @param set set to add all chars of str to
1422      * @param str string to add
1423      */
add(int set[], String str)1424     private static int add(int set[], String str)
1425     {
1426         int result = str.length();
1427 
1428         for (int i = result - 1; i >= 0; i --) {
1429             add(set, str.charAt(i));
1430         }
1431         return result;
1432     }
1433 
1434     /**
1435      * Adds all characters of the argument str and gets the length
1436      * Equivalent to calcStringSetLength.
1437      * @param set set to add all chars of str to
1438      * @param str string to add
1439      */
add(int set[], StringBuffer str)1440     private static int add(int set[], StringBuffer str)
1441     {
1442         int result = str.length();
1443 
1444         for (int i = result - 1; i >= 0; i --) {
1445             add(set, str.charAt(i));
1446         }
1447         return result;
1448     }
1449 
1450     /**
1451      * Adds all algorithmic names into the name set.
1452      * Equivalent to part of calcAlgNameSetsLengths.
1453      * @param maxlength length to compare to
1454      * @return the maximum length of any possible algorithmic name if it is >
1455      *         maxlength, otherwise maxlength is returned.
1456      */
addAlgorithmName(int maxlength)1457     private int addAlgorithmName(int maxlength)
1458     {
1459         int result = 0;
1460         for (int i = m_algorithm_.length - 1; i >= 0; i --) {
1461             result = m_algorithm_[i].add(m_nameSet_, maxlength);
1462             if (result > maxlength) {
1463                 maxlength = result;
1464             }
1465         }
1466         return maxlength;
1467     }
1468 
1469     /**
1470      * Adds all extended names into the name set.
1471      * Equivalent to part of calcExtNameSetsLengths.
1472      * @param maxlength length to compare to
1473      * @return the maxlength of any possible extended name.
1474      */
addExtendedName(int maxlength)1475     private int addExtendedName(int maxlength)
1476     {
1477         for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
1478             // for each category, count the length of the category name
1479             // plus 9 =
1480             // 2 for <>
1481             // 1 for -
1482             // 6 for most hex digits per code point
1483             int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1484             if (length > maxlength) {
1485                 maxlength = length;
1486             }
1487         }
1488         return maxlength;
1489     }
1490 
1491     /**
1492      * Adds names of a group to the argument set.
1493      * Equivalent to calcNameSetLength.
1494      * @param offset of the group name string in byte count
1495      * @param length of the group name string
1496      * @param tokenlength array to store the length of each token
1497      * @param set to add to
1498      * @return the length of the name string and the length of the group
1499      *         string parsed
1500      */
addGroupName(int offset, int length, byte tokenlength[], int set[])1501     private int[] addGroupName(int offset, int length, byte tokenlength[],
1502                                int set[])
1503     {
1504         int resultnlength = 0;
1505         int resultplength = 0;
1506         while (resultplength < length) {
1507             char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
1508             resultplength ++;
1509             if (b == ';') {
1510                 break;
1511             }
1512 
1513             if (b >= m_tokentable_.length) {
1514                 add(set, b); // implicit letter
1515                 resultnlength ++;
1516             }
1517             else {
1518                 char token = m_tokentable_[b & 0x00ff];
1519                 if (token == 0xFFFE) {
1520                     // this is a lead byte for a double-byte token
1521                     b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
1522                                          & 0x00ff));
1523                     token = m_tokentable_[b];
1524                     resultplength ++;
1525                 }
1526                 if (token == 0xFFFF) {
1527                     add(set, b);
1528                     resultnlength ++;
1529                 }
1530                 else {
1531                     // count token word
1532                     // use cached token length
1533                     byte tlength = tokenlength[b];
1534                     if (tlength == 0) {
1535                         synchronized (m_utilStringBuffer_) {
1536                             m_utilStringBuffer_.setLength(0);
1537                             UCharacterUtility.getNullTermByteSubString(
1538                                            m_utilStringBuffer_, m_tokenstring_,
1539                                            token);
1540                             tlength = (byte)add(set, m_utilStringBuffer_);
1541                         }
1542                         tokenlength[b] = tlength;
1543                     }
1544                     resultnlength += tlength;
1545                 }
1546             }
1547         }
1548         m_utilIntBuffer_[0] = resultnlength;
1549         m_utilIntBuffer_[1] = resultplength;
1550         return m_utilIntBuffer_;
1551     }
1552 
1553     /**
1554      * Adds names of all group to the argument set.
1555      * Sets the data member m_max*Length_.
1556      * Method called only once.
1557      * Equivalent to calcGroupNameSetsLength.
1558      * @param maxlength length to compare to
1559      */
addGroupName(int maxlength)1560     private void addGroupName(int maxlength)
1561     {
1562         int maxisolength = 0;
1563         char offsets[] = new char[LINES_PER_GROUP_ + 2];
1564         char lengths[] = new char[LINES_PER_GROUP_ + 2];
1565         byte tokenlengths[] = new byte[m_tokentable_.length];
1566 
1567         // enumerate all groups
1568         // for (int i = m_groupcount_ - 1; i >= 0; i --) {
1569         for (int i = 0; i < m_groupcount_ ; i ++) {
1570             int offset = getGroupLengths(i, offsets, lengths);
1571             // enumerate all lines in each group
1572             // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1573             //    linenumber --) {
1574             for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
1575                 linenumber ++) {
1576                 int lineoffset = offset + offsets[linenumber];
1577                 int length = lengths[linenumber];
1578                 if (length == 0) {
1579                     continue;
1580                 }
1581 
1582                 // read regular name
1583                 int parsed[] = addGroupName(lineoffset, length, tokenlengths,
1584                                             m_nameSet_);
1585                 if (parsed[0] > maxlength) {
1586                     // 0 for name length
1587                     maxlength = parsed[0];
1588                 }
1589                 lineoffset += parsed[1];
1590                 if (parsed[1] >= length) {
1591                     // 1 for parsed group string length
1592                     continue;
1593                 }
1594                 length -= parsed[1];
1595                 // read Unicode 1.0 name
1596                 parsed = addGroupName(lineoffset, length, tokenlengths,
1597                                       m_nameSet_);
1598                 if (parsed[0] > maxlength) {
1599                     // 0 for name length
1600                     maxlength = parsed[0];
1601                 }
1602                 lineoffset += parsed[1];
1603                 if (parsed[1] >= length) {
1604                     // 1 for parsed group string length
1605                     continue;
1606                 }
1607                 length -= parsed[1];
1608                 // read ISO comment
1609                 parsed = addGroupName(lineoffset, length, tokenlengths,
1610                                       m_ISOCommentSet_);
1611                 if (parsed[1] > maxisolength) {
1612                     maxisolength = length;
1613                 }
1614             }
1615         }
1616 
1617         // set gMax... - name length last for threading
1618         m_maxISOCommentLength_ = maxisolength;
1619         m_maxNameLength_ = maxlength;
1620     }
1621 
1622     /**
1623      * Sets up the name sets and the calculation of the maximum lengths.
1624      * Equivalent to calcNameSetsLengths.
1625      */
initNameSetsLengths()1626     private boolean initNameSetsLengths()
1627     {
1628         if (m_maxNameLength_ > 0) {
1629             return true;
1630         }
1631 
1632         String extra = "0123456789ABCDEF<>-";
1633         // set hex digits, used in various names, and <>-, used in extended
1634         // names
1635         for (int i = extra.length() - 1; i >= 0; i --) {
1636             add(m_nameSet_, extra.charAt(i));
1637         }
1638 
1639         // set sets and lengths from algorithmic names
1640         m_maxNameLength_ = addAlgorithmName(0);
1641         // set sets and lengths from extended names
1642         m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1643         // set sets and lengths from group names, set global maximum values
1644         addGroupName(m_maxNameLength_);
1645         return true;
1646     }
1647 
1648     /**
1649      * Converts the char set cset into a Unicode set uset.
1650      * Equivalent to charSetToUSet.
1651      * @param set Set of 256 bit flags corresponding to a set of chars.
1652      * @param uset USet to receive characters. Existing contents are deleted.
1653      */
convert(int set[], UnicodeSet uset)1654     private void convert(int set[], UnicodeSet uset)
1655     {
1656         uset.clear();
1657         if (!initNameSetsLengths()) {
1658             return;
1659         }
1660 
1661         // build a char string with all chars that are used in character names
1662         for (char c = 255; c > 0; c --) {
1663             if (contains(set, c)) {
1664                 uset.add(c);
1665             }
1666         }
1667     }
1668 }
1669