• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 
8 package com.ibm.icu.impl;
9 
10 import java.io.IOException;
11 import java.nio.ByteBuffer;
12 import java.util.Locale;
13 import java.util.MissingResourceException;
14 
15 import com.ibm.icu.lang.UCharacter;
16 import com.ibm.icu.lang.UCharacterCategory;
17 import com.ibm.icu.text.UTF16;
18 import com.ibm.icu.text.UnicodeSet;
19 
20 /**
21 * Internal class to manage character names.
22 * Since data for names are stored
23 * in an array of char, by default indexes used in this class is refering to
24 * a 2 byte count, unless otherwise stated. Cases where the index is refering
25 * to a byte count, the index is halved and depending on whether the index is
26 * even or odd, the MSB or LSB of the result char at the halved index is
27 * returned. For indexes to an array of int, the index is multiplied by 2,
28 * result char at the multiplied index and its following char is returned as an
29 * int.
30 * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
31 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
32 * @author Syn Wee Quek
33 * @since nov0700
34 */
35 
36 public final class UCharacterName
37 {
38     // public data members ----------------------------------------------
39 
40     /*
41      * public singleton instance
42      */
43     public static final UCharacterName INSTANCE;
44 
45     static {
46         try {
47             INSTANCE = new UCharacterName();
48         } catch (IOException e) {
49             ///CLOVER:OFF
50             throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","","");
51             ///CLOVER:ON
52         }
53     }
54 
55     /**
56     * Number of lines per group
57     * 1 << GROUP_SHIFT_
58     */
59     public static final int LINES_PER_GROUP_ = 1 << 5;
60     /**
61      * Maximum number of groups
62      */
63     public int m_groupcount_ = 0;
64 
65     // public methods ---------------------------------------------------
66 
67     /**
68     * Retrieve the name of a Unicode code point.
69     * Depending on <code>choice</code>, the character name written into the
70     * buffer is the "modern" name or the name that was defined in Unicode
71     * version 1.0.
72     * The name contains only "invariant" characters
73     * like A-Z, 0-9, space, and '-'.
74     *
75     * @param ch the code point for which to get the name.
76     * @param choice Selector for which name to get.
77     * @return if code point is above 0x1fff, null is returned
78     */
getName(int ch, int choice)79     public String getName(int ch, int choice)
80     {
81         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
82             choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
83             return null;
84         }
85 
86         String result = null;
87 
88         result = getAlgName(ch, choice);
89 
90         // getting normal character name
91         if (result == null || result.length() == 0) {
92             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
93                 result = getExtendedName(ch);
94             } else {
95                 result = getGroupName(ch, choice);
96             }
97         }
98 
99         return result;
100     }
101 
102     /**
103     * Find a character by its name and return its code point value
104     * @param choice selector to indicate if argument name is a Unicode 1.0
105     *        or the most current version
106     * @param name the name to search for
107     * @return code point
108     */
getCharFromName(int choice, String name)109     public int getCharFromName(int choice, String name)
110     {
111         // checks for illegal arguments
112         if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
113             name == null || name.length() == 0) {
114             return -1;
115         }
116 
117         // try extended names first
118         int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice);
119         if (result >= -1) {
120             return result;
121         }
122 
123         String upperCaseName = name.toUpperCase(Locale.ENGLISH);
124         // try algorithmic names first, if fails then try group names
125         // int result = getAlgorithmChar(choice, uppercasename);
126 
127         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
128             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
129         ) {
130             int count = 0;
131             if (m_algorithm_ != null) {
132                 count = m_algorithm_.length;
133             }
134             for (count --; count >= 0; count --) {
135                 result = m_algorithm_[count].getChar(upperCaseName);
136                 if (result >= 0) {
137                     return result;
138                 }
139             }
140         }
141 
142         if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
143             result = getGroupChar(upperCaseName,
144                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
145             if (result == -1) {
146                 result = getGroupChar(upperCaseName,
147                                       UCharacterNameChoice.CHAR_NAME_ALIAS);
148             }
149         }
150         else {
151             result = getGroupChar(upperCaseName, choice);
152         }
153         return result;
154     }
155 
156     // these are all UCharacterNameIterator use methods -------------------
157 
158     /**
159     * Reads a block of compressed lengths of 32 strings and expands them into
160     * offsets and lengths for each string. Lengths are stored with a
161     * variable-width encoding in consecutive nibbles:
162     * If a nibble<0xc, then it is the length itself (0 = empty string).
163     * If a nibble>=0xc, then it forms a length value with the following
164     * nibble.
165     * The offsets and lengths arrays must be at least 33 (one more) long
166     * because there is no check here at the end if the last nibble is still
167     * used.
168     * @param index of group string object in array
169     * @param offsets array to store the value of the string offsets
170     * @param lengths array to store the value of the string length
171     * @return next index of the data string immediately after the lengths
172     *         in terms of byte address
173     */
getGroupLengths(int index, char offsets[], char lengths[])174     public int getGroupLengths(int index, char offsets[], char lengths[])
175     {
176         char length = 0xffff;
177         byte b = 0,
178             n = 0;
179         int shift;
180         index = index * m_groupsize_; // byte count offsets of group strings
181         int stringoffset = UCharacterUtility.toInt(
182                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
183                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
184 
185         offsets[0] = 0;
186 
187         // all 32 lengths must be read to get the offset of the first group
188         // string
189         for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
190             b = m_groupstring_[stringoffset];
191             shift = 4;
192 
193             while (shift >= 0) {
194                 // getting nibble
195                 n = (byte)((b >> shift) & 0x0F);
196                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
197                     length = (char)((n - 12) << 4);
198                 }
199                 else {
200                     if (length != 0xffff) {
201                        lengths[i] = (char)((length | n) + 12);
202                     }
203                     else {
204                        lengths[i] = (char)n;
205                     }
206 
207                     if (i < LINES_PER_GROUP_) {
208                        offsets[i + 1] = (char)(offsets[i] + lengths[i]);
209                     }
210 
211                     length = 0xffff;
212                     i ++;
213                 }
214 
215                 shift -= 4;
216             }
217         }
218         return stringoffset;
219     }
220 
221     /**
222     * Gets the name of the argument group index.
223     * UnicodeData.txt uses ';' as a field separator, so no field can contain
224     * ';' as part of its contents. In unames.icu, it is marked as
225     * token[';'] == -1 only if the semicolon is used in the data file - which
226     * is iff we have Unicode 1.0 names or ISO comments or aliases.
227     * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases
228     * although we know that it will never be part of a name.
229     * Equivalent to ICU4C's expandName.
230     * @param index of the group name string in byte count
231     * @param length of the group name string
232     * @param choice of Unicode 1.0 name or the most current name
233     * @return name of the group
234     */
getGroupName(int index, int length, int choice)235     public String getGroupName(int index, int length, int choice)
236     {
237         if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
238             choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
239         ) {
240             if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
241                 /*
242                  * skip the modern name if it is not requested _and_
243                  * if the semicolon byte value is a character, not a token number
244                  */
245                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
246                 do {
247                     int oldindex = index;
248                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
249                                                        index, length, (byte)';');
250                     length -= (index - oldindex);
251                 } while(--fieldIndex>0);
252             }
253             else {
254                 // the semicolon byte is a token number, therefore only modern
255                 // names are stored in unames.dat and there is no such
256                 // requested alternate name here
257                 length = 0;
258             }
259         }
260 
261         synchronized (m_utilStringBuffer_) {
262             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
263             byte b;
264             char token;
265             for (int i = 0; i < length;) {
266                 b = m_groupstring_[index + i];
267                 i ++;
268 
269                 if (b >= m_tokentable_.length) {
270                     if (b == ';') {
271                         break;
272                     }
273                     m_utilStringBuffer_.append(b); // implicit letter
274                 }
275                 else {
276                     token = m_tokentable_[b & 0x00ff];
277                     if (token == 0xFFFE) {
278                         // this is a lead byte for a double-byte token
279                         token = m_tokentable_[b << 8 |
280                                           (m_groupstring_[index + i] & 0x00ff)];
281                         i ++;
282                     }
283                     if (token == 0xFFFF) {
284                         if (b == ';') {
285                             // skip the semicolon if we are seeking extended
286                             // names and there was no 2.0 name but there
287                             // is a 1.0 name.
288                             if (m_utilStringBuffer_.length() == 0 && choice ==
289                                    UCharacterNameChoice.EXTENDED_CHAR_NAME) {
290                                 continue;
291                             }
292                             break;
293                         }
294                         // explicit letter
295                         m_utilStringBuffer_.append((char)(b & 0x00ff));
296                     }
297                     else { // write token word
298                         UCharacterUtility.getNullTermByteSubString(
299                                 m_utilStringBuffer_, m_tokenstring_, token);
300                     }
301                 }
302             }
303 
304             if (m_utilStringBuffer_.length() > 0) {
305                 return m_utilStringBuffer_.toString();
306             }
307         }
308         return null;
309     }
310 
311     /**
312     * Retrieves the extended name
313     */
getExtendedName(int ch)314     public String getExtendedName(int ch)
315     {
316         String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
317         if (result == null) {
318             // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
319             result = getExtendedOr10Name(ch);
320         }
321         return result;
322     }
323 
324     /**
325      * Gets the group index for the codepoint, or the group before it.
326      * @param codepoint The codepoint index.
327      * @return group index containing codepoint or the group before it.
328      */
getGroup(int codepoint)329     public int getGroup(int codepoint)
330     {
331         int endGroup = m_groupcount_;
332         int msb      = getCodepointMSB(codepoint);
333         int result   = 0;
334         // binary search for the group of names that contains the one for
335         // code
336         // find the group that contains codepoint, or the highest before it
337         while (result < endGroup - 1) {
338             int gindex = (result + endGroup) >> 1;
339             if (msb < getGroupMSB(gindex)) {
340                 endGroup = gindex;
341             }
342             else {
343                 result = gindex;
344             }
345         }
346         return result;
347     }
348 
349     /**
350      * Gets the extended and 1.0 name when the most current unicode names
351      * fail
352      * @param ch codepoint
353      * @return name of codepoint extended or 1.0
354      */
getExtendedOr10Name(int ch)355     public String getExtendedOr10Name(int ch)
356     {
357         String result = null;
358         // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
359         if (result == null) {
360             int type = getType(ch);
361             // Return unknown if the table of names above is not up to
362             // date.
363             if (type >= TYPE_NAMES_.length) {
364                 result = UNKNOWN_TYPE_NAME_;
365             }
366             else {
367                 result = TYPE_NAMES_[type];
368             }
369             synchronized (m_utilStringBuffer_) {
370                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
371                 m_utilStringBuffer_.append('<');
372                 m_utilStringBuffer_.append(result);
373                 m_utilStringBuffer_.append('-');
374                 String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
375                 int zeros = 4 - chStr.length();
376                 while (zeros > 0) {
377                     m_utilStringBuffer_.append('0');
378                     zeros --;
379                 }
380                 m_utilStringBuffer_.append(chStr);
381                 m_utilStringBuffer_.append('>');
382                 result = m_utilStringBuffer_.toString();
383             }
384         }
385         return result;
386     }
387 
388     /**
389      * Gets the MSB from the group index
390      * @param gindex group index
391      * @return the MSB of the group if gindex is valid, -1 otherwise
392      */
getGroupMSB(int gindex)393     public int getGroupMSB(int gindex)
394     {
395         if (gindex >= m_groupcount_) {
396             return -1;
397         }
398         return m_groupinfo_[gindex * m_groupsize_];
399     }
400 
401     /**
402      * Gets the MSB of the codepoint
403      * @param codepoint The codepoint value.
404      * @return the MSB of the codepoint
405      */
getCodepointMSB(int codepoint)406     public static int getCodepointMSB(int codepoint)
407     {
408         return codepoint >> GROUP_SHIFT_;
409     }
410 
411     /**
412      * Gets the maximum codepoint + 1 of the group
413      * @param msb most significant byte of the group
414      * @return limit codepoint of the group
415      */
getGroupLimit(int msb)416     public static int getGroupLimit(int msb)
417     {
418         return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
419     }
420 
421     /**
422      * Gets the minimum codepoint of the group
423      * @param msb most significant byte of the group
424      * @return minimum codepoint of the group
425      */
getGroupMin(int msb)426     public static int getGroupMin(int msb)
427     {
428         return msb << GROUP_SHIFT_;
429     }
430 
431     /**
432      * Gets the offset to a group
433      * @param codepoint The codepoint value.
434      * @return offset to a group
435      */
getGroupOffset(int codepoint)436     public static int getGroupOffset(int codepoint)
437     {
438         return codepoint & GROUP_MASK_;
439     }
440 
441     /**
442      * Gets the minimum codepoint of a group
443      * @param codepoint The codepoint value.
444      * @return minimum codepoint in the group which codepoint belongs to
445      */
446     ///CLOVER:OFF
getGroupMinFromCodepoint(int codepoint)447     public static int getGroupMinFromCodepoint(int codepoint)
448     {
449         return codepoint & ~GROUP_MASK_;
450     }
451     ///CLOVER:ON
452 
453     /**
454      * Get the Algorithm range length
455      * @return Algorithm range length
456      */
getAlgorithmLength()457     public int getAlgorithmLength()
458     {
459         return m_algorithm_.length;
460     }
461 
462     /**
463      * Gets the start of the range
464      * @param index algorithm index
465      * @return algorithm range start
466      */
getAlgorithmStart(int index)467     public int getAlgorithmStart(int index)
468     {
469         return m_algorithm_[index].m_rangestart_;
470     }
471 
472     /**
473      * Gets the end of the range
474      * @param index algorithm index
475      * @return algorithm range end
476      */
getAlgorithmEnd(int index)477     public int getAlgorithmEnd(int index)
478     {
479         return m_algorithm_[index].m_rangeend_;
480     }
481 
482     /**
483      * Gets the Algorithmic name of the codepoint
484      * @param index algorithmic range index
485      * @param codepoint The codepoint value.
486      * @return algorithmic name of codepoint
487      */
getAlgorithmName(int index, int codepoint)488     public String getAlgorithmName(int index, int codepoint)
489     {
490         String result = null;
491         synchronized (m_utilStringBuffer_) {
492             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
493             m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
494             result = m_utilStringBuffer_.toString();
495         }
496         return result;
497     }
498 
499     /**
500     * Gets the group name of the character
501     * @param ch character to get the group name
502     * @param choice name choice selector to choose a unicode 1.0 or newer name
503     */
getGroupName(int ch, int choice)504     public synchronized String getGroupName(int ch, int choice)
505     {
506         // gets the msb
507         int msb   = getCodepointMSB(ch);
508         int group = getGroup(ch);
509 
510         // return this if it is an exact match
511         if (msb == m_groupinfo_[group * m_groupsize_]) {
512             int index = getGroupLengths(group, m_groupoffsets_,
513                                         m_grouplengths_);
514             int offset = ch & GROUP_MASK_;
515             return getGroupName(index + m_groupoffsets_[offset],
516                                 m_grouplengths_[offset], choice);
517         }
518 
519         return null;
520     }
521 
522     // these are transliterator use methods ---------------------------------
523 
524     /**
525      * Gets the maximum length of any codepoint name.
526      * Equivalent to uprv_getMaxCharNameLength.
527      * @return the maximum length of any codepoint name
528      */
getMaxCharNameLength()529     public int getMaxCharNameLength()
530     {
531         if (initNameSetsLengths()) {
532             return m_maxNameLength_;
533         }
534         else {
535             return 0;
536         }
537     }
538 
539     /**
540      * Gets the maximum length of any iso comments.
541      * Equivalent to uprv_getMaxISOCommentLength.
542      * @return the maximum length of any codepoint name
543      */
544     ///CLOVER:OFF
getMaxISOCommentLength()545     public int getMaxISOCommentLength()
546     {
547         if (initNameSetsLengths()) {
548             return m_maxISOCommentLength_;
549         }
550         else {
551             return 0;
552         }
553     }
554     ///CLOVER:ON
555 
556     /**
557      * Fills set with characters that are used in Unicode character names.
558      * Equivalent to uprv_getCharNameCharacters.
559      * @param set USet to receive characters. Existing contents are deleted.
560      */
getCharNameCharacters(UnicodeSet set)561     public void getCharNameCharacters(UnicodeSet set)
562     {
563         convert(m_nameSet_, set);
564     }
565 
566     /**
567      * Fills set with characters that are used in Unicode character names.
568      * Equivalent to uprv_getISOCommentCharacters.
569      * @param set USet to receive characters. Existing contents are deleted.
570      */
571     ///CLOVER:OFF
getISOCommentCharacters(UnicodeSet set)572     public void getISOCommentCharacters(UnicodeSet set)
573     {
574         convert(m_ISOCommentSet_, set);
575     }
576     ///CLOVER:ON
577 
578     // package private inner class --------------------------------------
579 
580     /**
581     * Algorithmic name class
582     */
583     static final class AlgorithmName
584     {
585         // package private data members ----------------------------------
586 
587         /**
588         * Constant type value of the different AlgorithmName
589         */
590         static final int TYPE_0_ = 0;
591         static final int TYPE_1_ = 1;
592 
593         // package private constructors ----------------------------------
594 
595         /**
596         * Constructor
597         */
AlgorithmName()598         AlgorithmName()
599         {
600         }
601 
602         // package private methods ---------------------------------------
603 
604         /**
605         * Sets the information for accessing the algorithmic names
606         * @param rangestart starting code point that lies within this name group
607         * @param rangeend end code point that lies within this name group
608         * @param type algorithm type. There's 2 kinds of algorithmic type. First
609         *        which uses code point as part of its name and the other uses
610         *        variant postfix strings
611         * @param variant algorithmic variant
612         * @return true if values are valid
613         */
setInfo(int rangestart, int rangeend, byte type, byte variant)614         boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
615         {
616             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
617                 && rangeend <= UCharacter.MAX_VALUE &&
618                 (type == TYPE_0_ || type == TYPE_1_)) {
619                 m_rangestart_ = rangestart;
620                 m_rangeend_ = rangeend;
621                 m_type_ = type;
622                 m_variant_ = variant;
623                 return true;
624             }
625             return false;
626         }
627 
628         /**
629         * Sets the factor data
630         * @param factor Array of factor
631         * @return true if factors are valid
632         */
setFactor(char factor[])633         boolean setFactor(char factor[])
634         {
635             if (factor.length == m_variant_) {
636                 m_factor_ = factor;
637                 return true;
638             }
639             return false;
640         }
641 
642         /**
643         * Sets the name prefix
644         * @param prefix
645         * @return true if prefix is set
646         */
setPrefix(String prefix)647         boolean setPrefix(String prefix)
648         {
649             if (prefix != null && prefix.length() > 0) {
650                 m_prefix_ = prefix;
651                 return true;
652             }
653             return false;
654         }
655 
656         /**
657         * Sets the variant factorized name data
658         * @param string variant factorized name data
659         * @return true if values are set
660         */
setFactorString(byte string[])661         boolean setFactorString(byte string[])
662         {
663             // factor and variant string can be empty for things like
664             // hanggul code points
665             m_factorstring_ = string;
666             return true;
667         }
668 
669         /**
670         * Checks if code point lies in Algorithm object at index
671         * @param ch code point
672         */
contains(int ch)673         boolean contains(int ch)
674         {
675             return m_rangestart_ <= ch && ch <= m_rangeend_;
676         }
677 
678         /**
679         * Appends algorithm name of code point into StringBuffer.
680         * Note this method does not check for validity of code point in Algorithm,
681         * result is undefined if code point does not belong in Algorithm.
682         * @param ch code point
683         * @param str StringBuffer to append to
684         */
appendName(int ch, StringBuffer str)685         void appendName(int ch, StringBuffer str)
686         {
687             str.append(m_prefix_);
688             switch (m_type_)
689             {
690                 case TYPE_0_:
691                     // prefix followed by hex digits indicating variants
692                 str.append(Utility.hex(ch,m_variant_));
693                     break;
694                 case TYPE_1_:
695                     // prefix followed by factorized-elements
696                     int offset = ch - m_rangestart_;
697                     int indexes[] = m_utilIntBuffer_;
698                     int factor;
699 
700                     // write elements according to the factors
701                     // the factorized elements are determined by modulo
702                     // arithmetic
703                     synchronized (m_utilIntBuffer_) {
704                         for (int i = m_variant_ - 1; i > 0; i --)
705                         {
706                             factor = m_factor_[i] & 0x00FF;
707                             indexes[i] = offset % factor;
708                             offset /= factor;
709                         }
710 
711                         // we don't need to calculate the last modulus because
712                         // start <= code <= end guarantees here that
713                         // code <= factors[0]
714                         indexes[0] = offset;
715 
716                         // joining up the factorized strings
717                         str.append(getFactorString(indexes, m_variant_));
718                     }
719                     break;
720             }
721         }
722 
723         /**
724         * Gets the character for the argument algorithmic name
725         * @return the algorithmic char or -1 otherwise.
726         */
getChar(String name)727         int getChar(String name)
728         {
729             int prefixlen = m_prefix_.length();
730             if (name.length() < prefixlen ||
731                 !m_prefix_.equals(name.substring(0, prefixlen))) {
732                 return -1;
733             }
734 
735             switch (m_type_)
736             {
737                 case TYPE_0_ :
738                 try
739                 {
740                     int result = Integer.parseInt(name.substring(prefixlen),
741                                                   16);
742                     // does it fit into the range?
743                     if (m_rangestart_ <= result && result <= m_rangeend_) {
744                         return result;
745                     }
746                 }
747                 catch (NumberFormatException e)
748                 {
749                     return -1;
750                 }
751                 break;
752                 case TYPE_1_ :
753                     // repetitative suffix name comparison done here
754                     // offset is the character code - start
755                     for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
756                     {
757                         int offset = ch - m_rangestart_;
758                         int indexes[] = m_utilIntBuffer_;
759                         int factor;
760 
761                         // write elements according to the factors
762                         // the factorized elements are determined by modulo
763                         // arithmetic
764                         synchronized (m_utilIntBuffer_) {
765                             for (int i = m_variant_ - 1; i > 0; i --)
766                             {
767                                 factor = m_factor_[i] & 0x00FF;
768                                 indexes[i] = offset % factor;
769                                 offset /= factor;
770                             }
771 
772                             // we don't need to calculate the last modulus
773                             // because start <= code <= end guarantees here that
774                             // code <= factors[0]
775                             indexes[0] = offset;
776 
777                             // joining up the factorized strings
778                             if (compareFactorString(indexes, m_variant_, name,
779                                                     prefixlen)) {
780                                 return ch;
781                             }
782                         }
783                     }
784             }
785 
786             return -1;
787         }
788 
789         /**
790          * Adds all chars in the set of algorithmic names into the set.
791          * Equivalent to part of calcAlgNameSetsLengths.
792          * @param set int set to add the chars of the algorithm names into
793          * @param maxlength maximum length to compare to
794          * @return the length that is either maxlength of the length of this
795          *         algorithm name if it is longer than maxlength
796          */
add(int set[], int maxlength)797         int add(int set[], int maxlength)
798         {
799             // prefix length
800             int length = UCharacterName.add(set, m_prefix_);
801             switch (m_type_) {
802                 case TYPE_0_ : {
803                     // name = prefix + (range->variant times) hex-digits
804                     // prefix
805                     length += m_variant_;
806                     /* synwee to check
807                      * addString(set, (const char *)(range + 1))
808                                        + range->variant;*/
809                     break;
810                 }
811                 case TYPE_1_ : {
812                     // name = prefix factorized-elements
813                     // get the set and maximum factor suffix length for each
814                     // factor
815                     for (int i = m_variant_ - 1; i > 0; i --)
816                     {
817                         int maxfactorlength = 0;
818                         int count = 0;
819                         for (int factor = m_factor_[i]; factor > 0; -- factor) {
820                             synchronized (m_utilStringBuffer_) {
821                                 m_utilStringBuffer_.delete(0,
822                                                 m_utilStringBuffer_.length());
823                                 count
824                                   = UCharacterUtility.getNullTermByteSubString(
825                                                 m_utilStringBuffer_,
826                                                 m_factorstring_, count);
827                                 UCharacterName.add(set, m_utilStringBuffer_);
828                                 if (m_utilStringBuffer_.length()
829                                                             > maxfactorlength)
830                                 {
831                                     maxfactorlength
832                                                 = m_utilStringBuffer_.length();
833                                 }
834                             }
835                         }
836                         length += maxfactorlength;
837                     }
838                 }
839             }
840             if (length > maxlength) {
841                 return length;
842             }
843             return maxlength;
844         }
845 
846         // private data members ------------------------------------------
847 
848         /**
849         * Algorithmic data information
850         */
851         private int m_rangestart_;
852         private int m_rangeend_;
853         private byte m_type_;
854         private byte m_variant_;
855         private char m_factor_[];
856         private String m_prefix_;
857         private byte m_factorstring_[];
858         /**
859          * Utility StringBuffer
860          */
861         private StringBuffer m_utilStringBuffer_ = new StringBuffer();
862         /**
863          * Utility int buffer
864          */
865         private int m_utilIntBuffer_[] = new int[256];
866 
867         // private methods -----------------------------------------------
868 
869         /**
870         * Gets the indexth string in each of the argument factor block
871         * @param index array with each index corresponding to each factor block
872         * @param length length of the array index
873         * @return the combined string of the array of indexth factor string in
874         *         factor block
875         */
getFactorString(int index[], int length)876         private String getFactorString(int index[], int length)
877         {
878             int size = m_factor_.length;
879             if (index == null || length != size) {
880                 return null;
881             }
882 
883             synchronized (m_utilStringBuffer_) {
884                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
885                 int count = 0;
886                 int factor;
887                 size --;
888                 for (int i = 0; i <= size; i ++) {
889                     factor = m_factor_[i];
890                     count = UCharacterUtility.skipNullTermByteSubString(
891                                              m_factorstring_, count, index[i]);
892                     count = UCharacterUtility.getNullTermByteSubString(
893                                           m_utilStringBuffer_, m_factorstring_,
894                                           count);
895                     if (i != size) {
896                         count = UCharacterUtility.skipNullTermByteSubString(
897                                                        m_factorstring_, count,
898                                                        factor - index[i] - 1);
899                     }
900                 }
901                 return m_utilStringBuffer_.toString();
902             }
903         }
904 
905         /**
906         * Compares the indexth string in each of the argument factor block with
907         * the argument string
908         * @param index array with each index corresponding to each factor block
909         * @param length index array length
910         * @param str string to compare with
911         * @param offset of str to start comparison
912         * @return true if string matches
913         */
compareFactorString(int index[], int length, String str, int offset)914         private boolean compareFactorString(int index[], int length, String str,
915                                             int offset)
916         {
917             int size = m_factor_.length;
918             if (index == null || length != size)
919                 return false;
920 
921             int count = 0;
922             int strcount = offset;
923             int factor;
924             size --;
925             for (int i = 0; i <= size; i ++)
926             {
927                 factor = m_factor_[i];
928                 count = UCharacterUtility.skipNullTermByteSubString(
929                                           m_factorstring_, count, index[i]);
930                 strcount = UCharacterUtility.compareNullTermByteSubString(str,
931                                           m_factorstring_, strcount, count);
932                 if (strcount < 0) {
933                     return false;
934                 }
935 
936                 if (i != size) {
937                     count = UCharacterUtility.skipNullTermByteSubString(
938                                   m_factorstring_, count, factor - index[i]);
939                 }
940             }
941             if (strcount != str.length()) {
942                 return false;
943             }
944             return true;
945         }
946     }
947 
948     // package private data members --------------------------------------
949 
950     /**
951      * Size of each groups
952      */
953     int m_groupsize_ = 0;
954 
955     // package private methods --------------------------------------------
956 
957     /**
958     * Sets the token data
959     * @param token array of tokens
960     * @param tokenstring array of string values of the tokens
961     * @return false if there is a data error
962     */
setToken(char token[], byte tokenstring[])963     boolean setToken(char token[], byte tokenstring[])
964     {
965         if (token != null && tokenstring != null && token.length > 0 &&
966             tokenstring.length > 0) {
967             m_tokentable_ = token;
968             m_tokenstring_ = tokenstring;
969             return true;
970         }
971         return false;
972     }
973 
974     /**
975     * Set the algorithm name information array
976     * @param alg Algorithm information array
977     * @return true if the group string offset has been set correctly
978     */
setAlgorithm(AlgorithmName alg[])979     boolean setAlgorithm(AlgorithmName alg[])
980     {
981         if (alg != null && alg.length != 0) {
982             m_algorithm_ = alg;
983             return true;
984         }
985         return false;
986     }
987 
988     /**
989     * Sets the number of group and size of each group in number of char
990     * @param count number of groups
991     * @param size size of group in char
992     * @return true if group size is set correctly
993     */
setGroupCountSize(int count, int size)994     boolean setGroupCountSize(int count, int size)
995     {
996         if (count <= 0 || size <= 0) {
997             return false;
998         }
999         m_groupcount_ = count;
1000         m_groupsize_ = size;
1001         return true;
1002     }
1003 
1004     /**
1005     * Sets the group name data
1006     * @param group index information array
1007     * @param groupstring name information array
1008     * @return false if there is a data error
1009     */
setGroup(char group[], byte groupstring[])1010     boolean setGroup(char group[], byte groupstring[])
1011     {
1012         if (group != null && groupstring != null && group.length > 0 &&
1013             groupstring.length > 0) {
1014             m_groupinfo_ = group;
1015             m_groupstring_ = groupstring;
1016             return true;
1017         }
1018         return false;
1019     }
1020 
1021     // private data members ----------------------------------------------
1022 
1023     /**
1024     * Data used in unames.icu
1025     */
1026     private char m_tokentable_[];
1027     private byte m_tokenstring_[];
1028     private char m_groupinfo_[];
1029     private byte m_groupstring_[];
1030     private AlgorithmName m_algorithm_[];
1031 
1032     /**
1033     * Group use.  Note - access must be synchronized.
1034     */
1035     private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1036     private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1037 
1038     /**
1039     * Default name of the name datafile
1040     */
1041     private static final String FILE_NAME_ = "unames.icu";
1042     /**
1043     * Shift count to retrieve group information
1044     */
1045     private static final int GROUP_SHIFT_ = 5;
1046     /**
1047     * Mask to retrieve the offset for a particular character within a group
1048     */
1049     private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1050 
1051     /**
1052     * Position of offsethigh in group information array
1053     */
1054     private static final int OFFSET_HIGH_OFFSET_ = 1;
1055 
1056     /**
1057     * Position of offsetlow in group information array
1058     */
1059     private static final int OFFSET_LOW_OFFSET_ = 2;
1060     /**
1061     * Double nibble indicator, any nibble > this number has to be combined
1062     * with its following nibble
1063     */
1064     private static final int SINGLE_NIBBLE_MAX_ = 11;
1065 
1066     /*
1067      * Maximum length of character names (regular & 1.0).
1068      */
1069     //private static int MAX_NAME_LENGTH_ = 0;
1070     /*
1071      * Maximum length of ISO comments.
1072      */
1073     //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1074 
1075     /**
1076      * Set of chars used in character names (regular & 1.0).
1077      * Chars are platform-dependent (can be EBCDIC).
1078      */
1079     private int m_nameSet_[] = new int[8];
1080     /**
1081      * Set of chars used in ISO comments. (regular & 1.0).
1082      * Chars are platform-dependent (can be EBCDIC).
1083      */
1084     private int m_ISOCommentSet_[] = new int[8];
1085     /**
1086      * Utility StringBuffer
1087      */
1088     private StringBuffer m_utilStringBuffer_ = new StringBuffer();
1089     /**
1090      * Utility int buffer
1091      */
1092     private int m_utilIntBuffer_[] = new int[2];
1093     /**
1094      * Maximum ISO comment length
1095      */
1096     private int m_maxISOCommentLength_;
1097     /**
1098      * Maximum name length
1099      */
1100     private int m_maxNameLength_;
1101     /**
1102      * Type names used for extended names
1103      */
1104     private static final String TYPE_NAMES_[] = {"unassigned",
1105                                                  "uppercase letter",
1106                                                  "lowercase letter",
1107                                                  "titlecase letter",
1108                                                  "modifier letter",
1109                                                  "other letter",
1110                                                  "non spacing mark",
1111                                                  "enclosing mark",
1112                                                  "combining spacing mark",
1113                                                  "decimal digit number",
1114                                                  "letter number",
1115                                                  "other number",
1116                                                  "space separator",
1117                                                  "line separator",
1118                                                  "paragraph separator",
1119                                                  "control",
1120                                                  "format",
1121                                                  "private use area",
1122                                                  "surrogate",
1123                                                  "dash punctuation",
1124                                                  "start punctuation",
1125                                                  "end punctuation",
1126                                                  "connector punctuation",
1127                                                  "other punctuation",
1128                                                  "math symbol",
1129                                                  "currency symbol",
1130                                                  "modifier symbol",
1131                                                  "other symbol",
1132                                                  "initial punctuation",
1133                                                  "final punctuation",
1134                                                  "noncharacter",
1135                                                  "lead surrogate",
1136                                                  "trail surrogate"};
1137     /**
1138      * Unknown type name
1139      */
1140     private static final String UNKNOWN_TYPE_NAME_ = "unknown";
1141     /**
1142      * Not a character type
1143      */
1144     private static final int NON_CHARACTER_
1145                                     = UCharacterCategory.CHAR_CATEGORY_COUNT;
1146     /**
1147     * Lead surrogate type
1148     */
1149     private static final int LEAD_SURROGATE_
1150                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1151     /**
1152     * Trail surrogate type
1153     */
1154     private static final int TRAIL_SURROGATE_
1155                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1156     /**
1157     * Extended category count
1158     */
1159     static final int EXTENDED_CATEGORY_
1160                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1161 
1162     // private constructor ------------------------------------------------
1163 
1164     /**
1165     * <p>Protected constructor for use in UCharacter.</p>
1166     * @exception IOException thrown when data reading fails
1167     */
UCharacterName()1168     private UCharacterName() throws IOException
1169     {
1170         ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
1171         UCharacterNameReader reader = new UCharacterNameReader(b);
1172         reader.read(this);
1173     }
1174 
1175     // private methods ---------------------------------------------------
1176 
1177     /**
1178     * Gets the algorithmic name for the argument character
1179     * @param ch character to determine name for
1180     * @param choice name choice
1181     * @return the algorithmic name or null if not found
1182     */
getAlgName(int ch, int choice)1183     private String getAlgName(int ch, int choice)
1184     {
1185         /* Only the normative character name can be algorithmic. */
1186         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
1187             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
1188         ) {
1189             // index in terms integer index
1190             synchronized (m_utilStringBuffer_) {
1191                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
1192 
1193                 for (int index = m_algorithm_.length - 1; index >= 0; index --)
1194                 {
1195                    if (m_algorithm_[index].contains(ch)) {
1196                       m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1197                       return m_utilStringBuffer_.toString();
1198                    }
1199                 }
1200             }
1201         }
1202         return null;
1203     }
1204 
1205     /**
1206     * Getting the character with the tokenized argument name
1207     * @param name of the character
1208     * @return character with the tokenized argument name or -1 if character
1209     *         is not found
1210     */
getGroupChar(String name, int choice)1211     private synchronized int getGroupChar(String name, int choice)
1212     {
1213         for (int i = 0; i < m_groupcount_; i ++) {
1214             // populating the data set of grouptable
1215 
1216             int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1217                                                   m_grouplengths_);
1218 
1219             // shift out to function
1220             int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
1221                                       choice);
1222             if (result != -1) {
1223                 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1224                          | result;
1225             }
1226         }
1227         return -1;
1228     }
1229 
1230     /**
1231     * Compares and retrieve character if name is found within the argument
1232     * group
1233     * @param index index where the set of names reside in the group block
1234     * @param length list of lengths of the strings
1235     * @param name character name to search for
1236     * @param choice of either 1.0 or the most current unicode name
1237     * @return relative character in the group which matches name, otherwise if
1238     *         not found, -1 will be returned
1239     */
getGroupChar(int index, char length[], String name, int choice)1240     private int getGroupChar(int index, char length[], String name,
1241                              int choice)
1242     {
1243         byte b = 0;
1244         char token;
1245         int len;
1246         int namelen = name.length();
1247         int nindex;
1248         int count;
1249 
1250         for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
1251             nindex = 0;
1252             len = length[result];
1253 
1254             if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
1255                 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
1256             ) {
1257                 /*
1258                  * skip the modern name if it is not requested _and_
1259                  * if the semicolon byte value is a character, not a token number
1260                  */
1261                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
1262                 do {
1263                     int oldindex = index;
1264                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
1265                                                          index, len, (byte)';');
1266                     len -= (index - oldindex);
1267                 } while(--fieldIndex>0);
1268             }
1269 
1270             // number of tokens is > the length of the name
1271             // write each letter directly, and write a token word per token
1272             for (count = 0; count < len && nindex != -1 && nindex < namelen;
1273                 ) {
1274                 b = m_groupstring_[index + count];
1275                 count ++;
1276 
1277                 if (b >= m_tokentable_.length) {
1278                     if (name.charAt(nindex ++) != (b & 0xFF)) {
1279                         nindex = -1;
1280                     }
1281                 }
1282                 else {
1283                     token = m_tokentable_[b & 0xFF];
1284                     if (token == 0xFFFE) {
1285                         // this is a lead byte for a double-byte token
1286                         token = m_tokentable_[b << 8 |
1287                                    (m_groupstring_[index + count] & 0x00ff)];
1288                         count ++;
1289                     }
1290                     if (token == 0xFFFF) {
1291                         if (name.charAt(nindex ++) != (b & 0xFF)) {
1292                             nindex = -1;
1293                         }
1294                     }
1295                     else {
1296                         // compare token with name
1297                         nindex = UCharacterUtility.compareNullTermByteSubString(
1298                                         name, m_tokenstring_, nindex, token);
1299                     }
1300                 }
1301             }
1302 
1303             if (namelen == nindex &&
1304                 (count == len || m_groupstring_[index + count] == ';')) {
1305                 return result;
1306             }
1307 
1308             index += len;
1309         }
1310         return -1;
1311     }
1312 
1313     /**
1314     * Gets the character extended type
1315     * @param ch character to be tested
1316     * @return extended type it is associated with
1317     */
getType(int ch)1318     private static int getType(int ch)
1319     {
1320         if (UCharacterUtility.isNonCharacter(ch)) {
1321             // not a character we return a invalid category count
1322             return NON_CHARACTER_;
1323         }
1324         int result = UCharacter.getType(ch);
1325         if (result == UCharacterCategory.SURROGATE) {
1326             if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1327                 result = LEAD_SURROGATE_;
1328             }
1329             else {
1330                 result = TRAIL_SURROGATE_;
1331             }
1332         }
1333         return result;
1334     }
1335 
1336     /**
1337     * Getting the character with extended name of the form <....>.
1338     * @param name of the character to be found
1339     * @param choice name choice
1340     * @return character associated with the name, -1 if such character is not
1341     *                   found and -2 if we should continue with the search.
1342     */
getExtendedChar(String name, int choice)1343     private static int getExtendedChar(String name, int choice)
1344     {
1345         if (name.charAt(0) == '<') {
1346             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1347                 int endIndex = name.length() - 1;
1348                 if (name.charAt(endIndex) == '>') {
1349                     int startIndex = name.lastIndexOf('-');
1350                     if (startIndex >= 0) { // We've got a category.
1351                         startIndex ++;
1352                         int result = -1;
1353                         try {
1354                             result = Integer.parseInt(
1355                                         name.substring(startIndex, endIndex),
1356                                         16);
1357                         }
1358                         catch (NumberFormatException e) {
1359                             return -1;
1360                         }
1361                         // Now validate the category name. We could use a
1362                         // binary search, or a trie, if we really wanted to.
1363                         String type = name.substring(1, startIndex - 1);
1364                         int length = TYPE_NAMES_.length;
1365                         for (int i = 0; i < length; ++ i) {
1366                             if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1367                                 if (getType(result) == i) {
1368                                     return result;
1369                                 }
1370                                 break;
1371                             }
1372                         }
1373                     }
1374                 }
1375             }
1376             return -1;
1377         }
1378         return -2;
1379     }
1380 
1381     // sets of name characters, maximum name lengths -----------------------
1382 
1383     /**
1384      * Adds a codepoint into a set of ints.
1385      * Equivalent to SET_ADD.
1386      * @param set set to add to
1387      * @param ch 16 bit char to add
1388      */
add(int set[], char ch)1389     private static void add(int set[], char ch)
1390     {
1391         set[ch >>> 5] |= 1 << (ch & 0x1f);
1392     }
1393 
1394     /**
1395      * Checks if a codepoint is a part of a set of ints.
1396      * Equivalent to SET_CONTAINS.
1397      * @param set set to check in
1398      * @param ch 16 bit char to check
1399      * @return true if codepoint is part of the set, false otherwise
1400      */
contains(int set[], char ch)1401     private static boolean contains(int set[], char ch)
1402     {
1403         return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1404     }
1405 
1406     /**
1407      * Adds all characters of the argument str and gets the length
1408      * Equivalent to calcStringSetLength.
1409      * @param set set to add all chars of str to
1410      * @param str string to add
1411      */
add(int set[], String str)1412     private static int add(int set[], String str)
1413     {
1414         int result = str.length();
1415 
1416         for (int i = result - 1; i >= 0; i --) {
1417             add(set, str.charAt(i));
1418         }
1419         return result;
1420     }
1421 
1422     /**
1423      * Adds all characters of the argument str and gets the length
1424      * Equivalent to calcStringSetLength.
1425      * @param set set to add all chars of str to
1426      * @param str string to add
1427      */
add(int set[], StringBuffer str)1428     private static int add(int set[], StringBuffer str)
1429     {
1430         int result = str.length();
1431 
1432         for (int i = result - 1; i >= 0; i --) {
1433             add(set, str.charAt(i));
1434         }
1435         return result;
1436     }
1437 
1438     /**
1439      * Adds all algorithmic names into the name set.
1440      * Equivalent to part of calcAlgNameSetsLengths.
1441      * @param maxlength length to compare to
1442      * @return the maximum length of any possible algorithmic name if it is >
1443      *         maxlength, otherwise maxlength is returned.
1444      */
addAlgorithmName(int maxlength)1445     private int addAlgorithmName(int maxlength)
1446     {
1447         int result = 0;
1448         for (int i = m_algorithm_.length - 1; i >= 0; i --) {
1449             result = m_algorithm_[i].add(m_nameSet_, maxlength);
1450             if (result > maxlength) {
1451                 maxlength = result;
1452             }
1453         }
1454         return maxlength;
1455     }
1456 
1457     /**
1458      * Adds all extended names into the name set.
1459      * Equivalent to part of calcExtNameSetsLengths.
1460      * @param maxlength length to compare to
1461      * @return the maxlength of any possible extended name.
1462      */
addExtendedName(int maxlength)1463     private int addExtendedName(int maxlength)
1464     {
1465         for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
1466             // for each category, count the length of the category name
1467             // plus 9 =
1468             // 2 for <>
1469             // 1 for -
1470             // 6 for most hex digits per code point
1471             int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1472             if (length > maxlength) {
1473                 maxlength = length;
1474             }
1475         }
1476         return maxlength;
1477     }
1478 
1479     /**
1480      * Adds names of a group to the argument set.
1481      * Equivalent to calcNameSetLength.
1482      * @param offset of the group name string in byte count
1483      * @param length of the group name string
1484      * @param tokenlength array to store the length of each token
1485      * @param set to add to
1486      * @return the length of the name string and the length of the group
1487      *         string parsed
1488      */
addGroupName(int offset, int length, byte tokenlength[], int set[])1489     private int[] addGroupName(int offset, int length, byte tokenlength[],
1490                                int set[])
1491     {
1492         int resultnlength = 0;
1493         int resultplength = 0;
1494         while (resultplength < length) {
1495             char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
1496             resultplength ++;
1497             if (b == ';') {
1498                 break;
1499             }
1500 
1501             if (b >= m_tokentable_.length) {
1502                 add(set, b); // implicit letter
1503                 resultnlength ++;
1504             }
1505             else {
1506                 char token = m_tokentable_[b & 0x00ff];
1507                 if (token == 0xFFFE) {
1508                     // this is a lead byte for a double-byte token
1509                     b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
1510                                          & 0x00ff));
1511                     token = m_tokentable_[b];
1512                     resultplength ++;
1513                 }
1514                 if (token == 0xFFFF) {
1515                     add(set, b);
1516                     resultnlength ++;
1517                 }
1518                 else {
1519                     // count token word
1520                     // use cached token length
1521                     byte tlength = tokenlength[b];
1522                     if (tlength == 0) {
1523                         synchronized (m_utilStringBuffer_) {
1524                             m_utilStringBuffer_.delete(0,
1525                                                  m_utilStringBuffer_.length());
1526                             UCharacterUtility.getNullTermByteSubString(
1527                                            m_utilStringBuffer_, m_tokenstring_,
1528                                            token);
1529                             tlength = (byte)add(set, m_utilStringBuffer_);
1530                         }
1531                         tokenlength[b] = tlength;
1532                     }
1533                     resultnlength += tlength;
1534                 }
1535             }
1536         }
1537         m_utilIntBuffer_[0] = resultnlength;
1538         m_utilIntBuffer_[1] = resultplength;
1539         return m_utilIntBuffer_;
1540     }
1541 
1542     /**
1543      * Adds names of all group to the argument set.
1544      * Sets the data member m_max*Length_.
1545      * Method called only once.
1546      * Equivalent to calcGroupNameSetsLength.
1547      * @param maxlength length to compare to
1548      */
addGroupName(int maxlength)1549     private void addGroupName(int maxlength)
1550     {
1551         int maxisolength = 0;
1552         char offsets[] = new char[LINES_PER_GROUP_ + 2];
1553         char lengths[] = new char[LINES_PER_GROUP_ + 2];
1554         byte tokenlengths[] = new byte[m_tokentable_.length];
1555 
1556         // enumerate all groups
1557         // for (int i = m_groupcount_ - 1; i >= 0; i --) {
1558         for (int i = 0; i < m_groupcount_ ; i ++) {
1559             int offset = getGroupLengths(i, offsets, lengths);
1560             // enumerate all lines in each group
1561             // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1562             //    linenumber --) {
1563             for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
1564                 linenumber ++) {
1565                 int lineoffset = offset + offsets[linenumber];
1566                 int length = lengths[linenumber];
1567                 if (length == 0) {
1568                     continue;
1569                 }
1570 
1571                 // read regular name
1572                 int parsed[] = addGroupName(lineoffset, length, tokenlengths,
1573                                             m_nameSet_);
1574                 if (parsed[0] > maxlength) {
1575                     // 0 for name length
1576                     maxlength = parsed[0];
1577                 }
1578                 lineoffset += parsed[1];
1579                 if (parsed[1] >= length) {
1580                     // 1 for parsed group string length
1581                     continue;
1582                 }
1583                 length -= parsed[1];
1584                 // read Unicode 1.0 name
1585                 parsed = addGroupName(lineoffset, length, tokenlengths,
1586                                       m_nameSet_);
1587                 if (parsed[0] > maxlength) {
1588                     // 0 for name length
1589                     maxlength = parsed[0];
1590                 }
1591                 lineoffset += parsed[1];
1592                 if (parsed[1] >= length) {
1593                     // 1 for parsed group string length
1594                     continue;
1595                 }
1596                 length -= parsed[1];
1597                 // read ISO comment
1598                 parsed = addGroupName(lineoffset, length, tokenlengths,
1599                                       m_ISOCommentSet_);
1600                 if (parsed[1] > maxisolength) {
1601                     maxisolength = length;
1602                 }
1603             }
1604         }
1605 
1606         // set gMax... - name length last for threading
1607         m_maxISOCommentLength_ = maxisolength;
1608         m_maxNameLength_ = maxlength;
1609     }
1610 
1611     /**
1612      * Sets up the name sets and the calculation of the maximum lengths.
1613      * Equivalent to calcNameSetsLengths.
1614      */
initNameSetsLengths()1615     private boolean initNameSetsLengths()
1616     {
1617         if (m_maxNameLength_ > 0) {
1618             return true;
1619         }
1620 
1621         String extra = "0123456789ABCDEF<>-";
1622         // set hex digits, used in various names, and <>-, used in extended
1623         // names
1624         for (int i = extra.length() - 1; i >= 0; i --) {
1625             add(m_nameSet_, extra.charAt(i));
1626         }
1627 
1628         // set sets and lengths from algorithmic names
1629         m_maxNameLength_ = addAlgorithmName(0);
1630         // set sets and lengths from extended names
1631         m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1632         // set sets and lengths from group names, set global maximum values
1633         addGroupName(m_maxNameLength_);
1634         return true;
1635     }
1636 
1637     /**
1638      * Converts the char set cset into a Unicode set uset.
1639      * Equivalent to charSetToUSet.
1640      * @param set Set of 256 bit flags corresponding to a set of chars.
1641      * @param uset USet to receive characters. Existing contents are deleted.
1642      */
convert(int set[], UnicodeSet uset)1643     private void convert(int set[], UnicodeSet uset)
1644     {
1645         uset.clear();
1646         if (!initNameSetsLengths()) {
1647             return;
1648         }
1649 
1650         // build a char string with all chars that are used in character names
1651         for (char c = 255; c > 0; c --) {
1652             if (contains(set, c)) {
1653                 uset.add(c);
1654             }
1655         }
1656     }
1657 }
1658