• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.impl;
11 
12 import java.io.IOException;
13 import java.nio.ByteBuffer;
14 import java.util.Locale;
15 import java.util.MissingResourceException;
16 
17 import com.ibm.icu.lang.UCharacter;
18 import com.ibm.icu.lang.UCharacterCategory;
19 import com.ibm.icu.text.UTF16;
20 import com.ibm.icu.text.UnicodeSet;
21 
22 /**
23 * Internal class to manage character names.
24 * Since data for names are stored
25 * in an array of char, by default indexes used in this class is refering to
26 * a 2 byte count, unless otherwise stated. Cases where the index is refering
27 * to a byte count, the index is halved and depending on whether the index is
28 * even or odd, the MSB or LSB of the result char at the halved index is
29 * returned. For indexes to an array of int, the index is multiplied by 2,
30 * result char at the multiplied index and its following char is returned as an
31 * int.
32 * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
33 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
34 * @author Syn Wee Quek
35 * @since nov0700
36 */
37 
38 public final class UCharacterName
39 {
40     // public data members ----------------------------------------------
41 
42     /*
43      * public singleton instance
44      */
45     public static final UCharacterName INSTANCE;
46 
47     static {
48         try {
49             INSTANCE = new UCharacterName();
50         } catch (IOException e) {
51             ///CLOVER:OFF
52             throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","","");
53             ///CLOVER:ON
54         }
55     }
56 
57     /**
58     * Number of lines per group
59     * 1 << GROUP_SHIFT_
60     */
61     public static final int LINES_PER_GROUP_ = 1 << 5;
62     /**
63      * Maximum number of groups
64      */
65     public int m_groupcount_ = 0;
66 
67     // public methods ---------------------------------------------------
68 
69     /**
70     * Retrieve the name of a Unicode code point.
71     * Depending on <code>choice</code>, the character name written into the
72     * buffer is the "modern" name or the name that was defined in Unicode
73     * version 1.0.
74     * The name contains only "invariant" characters
75     * like A-Z, 0-9, space, and '-'.
76     *
77     * @param ch the code point for which to get the name.
78     * @param choice Selector for which name to get.
79     * @return if code point is above 0x1fff, null is returned
80     */
getName(int ch, int choice)81     public String getName(int ch, int choice)
82     {
83         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
84             choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
85             return null;
86         }
87 
88         String result = null;
89 
90         result = getAlgName(ch, choice);
91 
92         // getting normal character name
93         if (result == null || result.length() == 0) {
94             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
95                 result = getExtendedName(ch);
96             } else {
97                 result = getGroupName(ch, choice);
98             }
99         }
100 
101         return result;
102     }
103 
104     /**
105     * Find a character by its name and return its code point value
106     * @param choice selector to indicate if argument name is a Unicode 1.0
107     *        or the most current version
108     * @param name the name to search for
109     * @return code point
110     */
getCharFromName(int choice, String name)111     public int getCharFromName(int choice, String name)
112     {
113         // checks for illegal arguments
114         if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
115             name == null || name.length() == 0) {
116             return -1;
117         }
118 
119         // try extended names first
120         int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice);
121         if (result >= -1) {
122             return result;
123         }
124 
125         String upperCaseName = name.toUpperCase(Locale.ENGLISH);
126         // try algorithmic names first, if fails then try group names
127         // int result = getAlgorithmChar(choice, uppercasename);
128 
129         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
130             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
131         ) {
132             int count = 0;
133             if (m_algorithm_ != null) {
134                 count = m_algorithm_.length;
135             }
136             for (count --; count >= 0; count --) {
137                 result = m_algorithm_[count].getChar(upperCaseName);
138                 if (result >= 0) {
139                     return result;
140                 }
141             }
142         }
143 
144         if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
145             result = getGroupChar(upperCaseName,
146                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
147             if (result == -1) {
148                 result = getGroupChar(upperCaseName,
149                                       UCharacterNameChoice.CHAR_NAME_ALIAS);
150             }
151         }
152         else {
153             result = getGroupChar(upperCaseName, choice);
154         }
155         return result;
156     }
157 
158     // these are all UCharacterNameIterator use methods -------------------
159 
160     /**
161     * Reads a block of compressed lengths of 32 strings and expands them into
162     * offsets and lengths for each string. Lengths are stored with a
163     * variable-width encoding in consecutive nibbles:
164     * If a nibble<0xc, then it is the length itself (0 = empty string).
165     * If a nibble>=0xc, then it forms a length value with the following
166     * nibble.
167     * The offsets and lengths arrays must be at least 33 (one more) long
168     * because there is no check here at the end if the last nibble is still
169     * used.
170     * @param index of group string object in array
171     * @param offsets array to store the value of the string offsets
172     * @param lengths array to store the value of the string length
173     * @return next index of the data string immediately after the lengths
174     *         in terms of byte address
175     */
getGroupLengths(int index, char offsets[], char lengths[])176     public int getGroupLengths(int index, char offsets[], char lengths[])
177     {
178         char length = 0xffff;
179         byte b = 0,
180             n = 0;
181         int shift;
182         index = index * m_groupsize_; // byte count offsets of group strings
183         int stringoffset = UCharacterUtility.toInt(
184                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
185                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
186 
187         offsets[0] = 0;
188 
189         // all 32 lengths must be read to get the offset of the first group
190         // string
191         for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
192             b = m_groupstring_[stringoffset];
193             shift = 4;
194 
195             while (shift >= 0) {
196                 // getting nibble
197                 n = (byte)((b >> shift) & 0x0F);
198                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
199                     length = (char)((n - 12) << 4);
200                 }
201                 else {
202                     if (length != 0xffff) {
203                        lengths[i] = (char)((length | n) + 12);
204                     }
205                     else {
206                        lengths[i] = (char)n;
207                     }
208 
209                     if (i < LINES_PER_GROUP_) {
210                        offsets[i + 1] = (char)(offsets[i] + lengths[i]);
211                     }
212 
213                     length = 0xffff;
214                     i ++;
215                 }
216 
217                 shift -= 4;
218             }
219         }
220         return stringoffset;
221     }
222 
223     /**
224     * Gets the name of the argument group index.
225     * UnicodeData.txt uses ';' as a field separator, so no field can contain
226     * ';' as part of its contents. In unames.icu, it is marked as
227     * token[';'] == -1 only if the semicolon is used in the data file - which
228     * is iff we have Unicode 1.0 names or ISO comments or aliases.
229     * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases
230     * although we know that it will never be part of a name.
231     * Equivalent to ICU4C's expandName.
232     * @param index of the group name string in byte count
233     * @param length of the group name string
234     * @param choice of Unicode 1.0 name or the most current name
235     * @return name of the group
236     */
getGroupName(int index, int length, int choice)237     public String getGroupName(int index, int length, int choice)
238     {
239         if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
240             choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
241         ) {
242             if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
243                 /*
244                  * skip the modern name if it is not requested _and_
245                  * if the semicolon byte value is a character, not a token number
246                  */
247                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
248                 do {
249                     int oldindex = index;
250                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
251                                                        index, length, (byte)';');
252                     length -= (index - oldindex);
253                 } while(--fieldIndex>0);
254             }
255             else {
256                 // the semicolon byte is a token number, therefore only modern
257                 // names are stored in unames.dat and there is no such
258                 // requested alternate name here
259                 length = 0;
260             }
261         }
262 
263         synchronized (m_utilStringBuffer_) {
264             m_utilStringBuffer_.setLength(0);
265             byte b;
266             char token;
267             for (int i = 0; i < length;) {
268                 b = m_groupstring_[index + i];
269                 i ++;
270 
271                 if (b >= m_tokentable_.length) {
272                     if (b == ';') {
273                         break;
274                     }
275                     m_utilStringBuffer_.append(b); // implicit letter
276                 }
277                 else {
278                     token = m_tokentable_[b & 0x00ff];
279                     if (token == 0xFFFE) {
280                         // this is a lead byte for a double-byte token
281                         token = m_tokentable_[b << 8 |
282                                           (m_groupstring_[index + i] & 0x00ff)];
283                         i ++;
284                     }
285                     if (token == 0xFFFF) {
286                         if (b == ';') {
287                             // skip the semicolon if we are seeking extended
288                             // names and there was no 2.0 name but there
289                             // is a 1.0 name.
290                             if (m_utilStringBuffer_.length() == 0 && choice ==
291                                    UCharacterNameChoice.EXTENDED_CHAR_NAME) {
292                                 continue;
293                             }
294                             break;
295                         }
296                         // explicit letter
297                         m_utilStringBuffer_.append((char)(b & 0x00ff));
298                     }
299                     else { // write token word
300                         UCharacterUtility.getNullTermByteSubString(
301                                 m_utilStringBuffer_, m_tokenstring_, token);
302                     }
303                 }
304             }
305 
306             if (m_utilStringBuffer_.length() > 0) {
307                 return m_utilStringBuffer_.toString();
308             }
309         }
310         return null;
311     }
312 
313     /**
314     * Retrieves the extended name
315     */
getExtendedName(int ch)316     public String getExtendedName(int ch)
317     {
318         String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
319         if (result == null) {
320             // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
321             result = getExtendedOr10Name(ch);
322         }
323         return result;
324     }
325 
326     /**
327      * Gets the group index for the codepoint, or the group before it.
328      * @param codepoint The codepoint index.
329      * @return group index containing codepoint or the group before it.
330      */
getGroup(int codepoint)331     public int getGroup(int codepoint)
332     {
333         int endGroup = m_groupcount_;
334         int msb      = getCodepointMSB(codepoint);
335         int result   = 0;
336         // binary search for the group of names that contains the one for
337         // code
338         // find the group that contains codepoint, or the highest before it
339         while (result < endGroup - 1) {
340             int gindex = (result + endGroup) >> 1;
341             if (msb < getGroupMSB(gindex)) {
342                 endGroup = gindex;
343             }
344             else {
345                 result = gindex;
346             }
347         }
348         return result;
349     }
350 
351     /**
352      * Gets the extended and 1.0 name when the most current unicode names
353      * fail
354      * @param ch codepoint
355      * @return name of codepoint extended or 1.0
356      */
getExtendedOr10Name(int ch)357     public String getExtendedOr10Name(int ch)
358     {
359         String result = null;
360         // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
361         if (result == null) {
362             int type = getType(ch);
363             // Return unknown if the table of names above is not up to
364             // date.
365             if (type >= TYPE_NAMES_.length) {
366                 result = UNKNOWN_TYPE_NAME_;
367             }
368             else {
369                 result = TYPE_NAMES_[type];
370             }
371             synchronized (m_utilStringBuffer_) {
372                 m_utilStringBuffer_.setLength(0);
373                 m_utilStringBuffer_.append('<');
374                 m_utilStringBuffer_.append(result);
375                 m_utilStringBuffer_.append('-');
376                 String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
377                 int zeros = 4 - chStr.length();
378                 while (zeros > 0) {
379                     m_utilStringBuffer_.append('0');
380                     zeros --;
381                 }
382                 m_utilStringBuffer_.append(chStr);
383                 m_utilStringBuffer_.append('>');
384                 result = m_utilStringBuffer_.toString();
385             }
386         }
387         return result;
388     }
389 
390     /**
391      * Gets the MSB from the group index
392      * @param gindex group index
393      * @return the MSB of the group if gindex is valid, -1 otherwise
394      */
getGroupMSB(int gindex)395     public int getGroupMSB(int gindex)
396     {
397         if (gindex >= m_groupcount_) {
398             return -1;
399         }
400         return m_groupinfo_[gindex * m_groupsize_];
401     }
402 
403     /**
404      * Gets the MSB of the codepoint
405      * @param codepoint The codepoint value.
406      * @return the MSB of the codepoint
407      */
getCodepointMSB(int codepoint)408     public static int getCodepointMSB(int codepoint)
409     {
410         return codepoint >> GROUP_SHIFT_;
411     }
412 
413     /**
414      * Gets the maximum codepoint + 1 of the group
415      * @param msb most significant byte of the group
416      * @return limit codepoint of the group
417      */
getGroupLimit(int msb)418     public static int getGroupLimit(int msb)
419     {
420         return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
421     }
422 
423     /**
424      * Gets the minimum codepoint of the group
425      * @param msb most significant byte of the group
426      * @return minimum codepoint of the group
427      */
getGroupMin(int msb)428     public static int getGroupMin(int msb)
429     {
430         return msb << GROUP_SHIFT_;
431     }
432 
433     /**
434      * Gets the offset to a group
435      * @param codepoint The codepoint value.
436      * @return offset to a group
437      */
getGroupOffset(int codepoint)438     public static int getGroupOffset(int codepoint)
439     {
440         return codepoint & GROUP_MASK_;
441     }
442 
443     /**
444      * Gets the minimum codepoint of a group
445      * @param codepoint The codepoint value.
446      * @return minimum codepoint in the group which codepoint belongs to
447      */
448     ///CLOVER:OFF
getGroupMinFromCodepoint(int codepoint)449     public static int getGroupMinFromCodepoint(int codepoint)
450     {
451         return codepoint & ~GROUP_MASK_;
452     }
453     ///CLOVER:ON
454 
455     /**
456      * Get the Algorithm range length
457      * @return Algorithm range length
458      */
getAlgorithmLength()459     public int getAlgorithmLength()
460     {
461         return m_algorithm_.length;
462     }
463 
464     /**
465      * Gets the start of the range
466      * @param index algorithm index
467      * @return algorithm range start
468      */
getAlgorithmStart(int index)469     public int getAlgorithmStart(int index)
470     {
471         return m_algorithm_[index].m_rangestart_;
472     }
473 
474     /**
475      * Gets the end of the range
476      * @param index algorithm index
477      * @return algorithm range end
478      */
getAlgorithmEnd(int index)479     public int getAlgorithmEnd(int index)
480     {
481         return m_algorithm_[index].m_rangeend_;
482     }
483 
484     /**
485      * Gets the Algorithmic name of the codepoint
486      * @param index algorithmic range index
487      * @param codepoint The codepoint value.
488      * @return algorithmic name of codepoint
489      */
getAlgorithmName(int index, int codepoint)490     public String getAlgorithmName(int index, int codepoint)
491     {
492         String result = null;
493         synchronized (m_utilStringBuffer_) {
494             m_utilStringBuffer_.setLength(0);
495             m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
496             result = m_utilStringBuffer_.toString();
497         }
498         return result;
499     }
500 
501     /**
502     * Gets the group name of the character
503     * @param ch character to get the group name
504     * @param choice name choice selector to choose a unicode 1.0 or newer name
505     */
getGroupName(int ch, int choice)506     public synchronized String getGroupName(int ch, int choice)
507     {
508         // gets the msb
509         int msb   = getCodepointMSB(ch);
510         int group = getGroup(ch);
511 
512         // return this if it is an exact match
513         if (msb == m_groupinfo_[group * m_groupsize_]) {
514             int index = getGroupLengths(group, m_groupoffsets_,
515                                         m_grouplengths_);
516             int offset = ch & GROUP_MASK_;
517             return getGroupName(index + m_groupoffsets_[offset],
518                                 m_grouplengths_[offset], choice);
519         }
520 
521         return null;
522     }
523 
524     // these are transliterator use methods ---------------------------------
525 
526     /**
527      * Gets the maximum length of any codepoint name.
528      * Equivalent to uprv_getMaxCharNameLength.
529      * @return the maximum length of any codepoint name
530      */
getMaxCharNameLength()531     public int getMaxCharNameLength()
532     {
533         if (initNameSetsLengths()) {
534             return m_maxNameLength_;
535         }
536         else {
537             return 0;
538         }
539     }
540 
541     /**
542      * Gets the maximum length of any iso comments.
543      * Equivalent to uprv_getMaxISOCommentLength.
544      * @return the maximum length of any codepoint name
545      */
546     ///CLOVER:OFF
getMaxISOCommentLength()547     public int getMaxISOCommentLength()
548     {
549         if (initNameSetsLengths()) {
550             return m_maxISOCommentLength_;
551         }
552         else {
553             return 0;
554         }
555     }
556     ///CLOVER:ON
557 
558     /**
559      * Fills set with characters that are used in Unicode character names.
560      * Equivalent to uprv_getCharNameCharacters.
561      * @param set USet to receive characters. Existing contents are deleted.
562      */
getCharNameCharacters(UnicodeSet set)563     public void getCharNameCharacters(UnicodeSet set)
564     {
565         convert(m_nameSet_, set);
566     }
567 
568     /**
569      * Fills set with characters that are used in Unicode character names.
570      * Equivalent to uprv_getISOCommentCharacters.
571      * @param set USet to receive characters. Existing contents are deleted.
572      */
573     ///CLOVER:OFF
getISOCommentCharacters(UnicodeSet set)574     public void getISOCommentCharacters(UnicodeSet set)
575     {
576         convert(m_ISOCommentSet_, set);
577     }
578     ///CLOVER:ON
579 
580     // package private inner class --------------------------------------
581 
582     /**
583     * Algorithmic name class
584     */
585     static final class AlgorithmName
586     {
587         // package private data members ----------------------------------
588 
589         /**
590         * Constant type value of the different AlgorithmName
591         */
592         static final int TYPE_0_ = 0;
593         static final int TYPE_1_ = 1;
594 
595         // package private constructors ----------------------------------
596 
597         /**
598         * Constructor
599         */
AlgorithmName()600         AlgorithmName()
601         {
602         }
603 
604         // package private methods ---------------------------------------
605 
606         /**
607         * Sets the information for accessing the algorithmic names
608         * @param rangestart starting code point that lies within this name group
609         * @param rangeend end code point that lies within this name group
610         * @param type algorithm type. There's 2 kinds of algorithmic type. First
611         *        which uses code point as part of its name and the other uses
612         *        variant postfix strings
613         * @param variant algorithmic variant
614         * @return true if values are valid
615         */
setInfo(int rangestart, int rangeend, byte type, byte variant)616         boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
617         {
618             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
619                 && rangeend <= UCharacter.MAX_VALUE &&
620                 (type == TYPE_0_ || type == TYPE_1_)) {
621                 m_rangestart_ = rangestart;
622                 m_rangeend_ = rangeend;
623                 m_type_ = type;
624                 m_variant_ = variant;
625                 return true;
626             }
627             return false;
628         }
629 
630         /**
631         * Sets the factor data
632         * @param factor Array of factor
633         * @return true if factors are valid
634         */
setFactor(char factor[])635         boolean setFactor(char factor[])
636         {
637             if (factor.length == m_variant_) {
638                 m_factor_ = factor;
639                 return true;
640             }
641             return false;
642         }
643 
644         /**
645         * Sets the name prefix
646         * @param prefix
647         * @return true if prefix is set
648         */
setPrefix(String prefix)649         boolean setPrefix(String prefix)
650         {
651             if (prefix != null && prefix.length() > 0) {
652                 m_prefix_ = prefix;
653                 return true;
654             }
655             return false;
656         }
657 
658         /**
659         * Sets the variant factorized name data
660         * @param string variant factorized name data
661         * @return true if values are set
662         */
setFactorString(byte string[])663         boolean setFactorString(byte string[])
664         {
665             // factor and variant string can be empty for things like
666             // hanggul code points
667             m_factorstring_ = string;
668             return true;
669         }
670 
671         /**
672         * Checks if code point lies in Algorithm object at index
673         * @param ch code point
674         */
contains(int ch)675         boolean contains(int ch)
676         {
677             return m_rangestart_ <= ch && ch <= m_rangeend_;
678         }
679 
680         /**
681         * Appends algorithm name of code point into StringBuffer.
682         * Note this method does not check for validity of code point in Algorithm,
683         * result is undefined if code point does not belong in Algorithm.
684         * @param ch code point
685         * @param str StringBuffer to append to
686         */
appendName(int ch, StringBuffer str)687         void appendName(int ch, StringBuffer str)
688         {
689             str.append(m_prefix_);
690             switch (m_type_)
691             {
692                 case TYPE_0_:
693                     // prefix followed by hex digits indicating variants
694                 str.append(Utility.hex(ch,m_variant_));
695                     break;
696                 case TYPE_1_:
697                     // prefix followed by factorized-elements
698                     int offset = ch - m_rangestart_;
699                     int indexes[] = m_utilIntBuffer_;
700                     int factor;
701 
702                     // write elements according to the factors
703                     // the factorized elements are determined by modulo
704                     // arithmetic
705                     synchronized (m_utilIntBuffer_) {
706                         for (int i = m_variant_ - 1; i > 0; i --)
707                         {
708                             factor = m_factor_[i] & 0x00FF;
709                             indexes[i] = offset % factor;
710                             offset /= factor;
711                         }
712 
713                         // we don't need to calculate the last modulus because
714                         // start <= code <= end guarantees here that
715                         // code <= factors[0]
716                         indexes[0] = offset;
717 
718                         // joining up the factorized strings
719                         str.append(getFactorString(indexes, m_variant_));
720                     }
721                     break;
722             }
723         }
724 
725         /**
726         * Gets the character for the argument algorithmic name
727         * @return the algorithmic char or -1 otherwise.
728         */
getChar(String name)729         int getChar(String name)
730         {
731             int prefixlen = m_prefix_.length();
732             if (name.length() < prefixlen ||
733                 !m_prefix_.equals(name.substring(0, prefixlen))) {
734                 return -1;
735             }
736 
737             switch (m_type_)
738             {
739                 case TYPE_0_ :
740                 try
741                 {
742                     int result = Integer.parseInt(name.substring(prefixlen),
743                                                   16);
744                     // does it fit into the range?
745                     if (m_rangestart_ <= result && result <= m_rangeend_) {
746                         return result;
747                     }
748                 }
749                 catch (NumberFormatException e)
750                 {
751                     return -1;
752                 }
753                 break;
754                 case TYPE_1_ :
755                     // repetitative suffix name comparison done here
756                     // offset is the character code - start
757                     for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
758                     {
759                         int offset = ch - m_rangestart_;
760                         int indexes[] = m_utilIntBuffer_;
761                         int factor;
762 
763                         // write elements according to the factors
764                         // the factorized elements are determined by modulo
765                         // arithmetic
766                         synchronized (m_utilIntBuffer_) {
767                             for (int i = m_variant_ - 1; i > 0; i --)
768                             {
769                                 factor = m_factor_[i] & 0x00FF;
770                                 indexes[i] = offset % factor;
771                                 offset /= factor;
772                             }
773 
774                             // we don't need to calculate the last modulus
775                             // because start <= code <= end guarantees here that
776                             // code <= factors[0]
777                             indexes[0] = offset;
778 
779                             // joining up the factorized strings
780                             if (compareFactorString(indexes, m_variant_, name,
781                                                     prefixlen)) {
782                                 return ch;
783                             }
784                         }
785                     }
786             }
787 
788             return -1;
789         }
790 
791         /**
792          * Adds all chars in the set of algorithmic names into the set.
793          * Equivalent to part of calcAlgNameSetsLengths.
794          * @param set int set to add the chars of the algorithm names into
795          * @param maxlength maximum length to compare to
796          * @return the length that is either maxlength of the length of this
797          *         algorithm name if it is longer than maxlength
798          */
add(int set[], int maxlength)799         int add(int set[], int maxlength)
800         {
801             // prefix length
802             int length = UCharacterName.add(set, m_prefix_);
803             switch (m_type_) {
804                 case TYPE_0_ : {
805                     // name = prefix + (range->variant times) hex-digits
806                     // prefix
807                     length += m_variant_;
808                     /* synwee to check
809                      * addString(set, (const char *)(range + 1))
810                                        + range->variant;*/
811                     break;
812                 }
813                 case TYPE_1_ : {
814                     // name = prefix factorized-elements
815                     // get the set and maximum factor suffix length for each
816                     // factor
817                     for (int i = m_variant_ - 1; i > 0; i --)
818                     {
819                         int maxfactorlength = 0;
820                         int count = 0;
821                         for (int factor = m_factor_[i]; factor > 0; -- factor) {
822                             synchronized (m_utilStringBuffer_) {
823                                 m_utilStringBuffer_.setLength(0);
824                                 count
825                                   = UCharacterUtility.getNullTermByteSubString(
826                                                 m_utilStringBuffer_,
827                                                 m_factorstring_, count);
828                                 UCharacterName.add(set, m_utilStringBuffer_);
829                                 if (m_utilStringBuffer_.length()
830                                                             > maxfactorlength)
831                                 {
832                                     maxfactorlength
833                                                 = m_utilStringBuffer_.length();
834                                 }
835                             }
836                         }
837                         length += maxfactorlength;
838                     }
839                 }
840             }
841             if (length > maxlength) {
842                 return length;
843             }
844             return maxlength;
845         }
846 
847         // private data members ------------------------------------------
848 
849         /**
850         * Algorithmic data information
851         */
852         private int m_rangestart_;
853         private int m_rangeend_;
854         private byte m_type_;
855         private byte m_variant_;
856         private char m_factor_[];
857         private String m_prefix_;
858         private byte m_factorstring_[];
859         /**
860          * Utility StringBuffer
861          */
862         private StringBuffer m_utilStringBuffer_ = new StringBuffer();
863         /**
864          * Utility int buffer
865          */
866         private int m_utilIntBuffer_[] = new int[256];
867 
868         // private methods -----------------------------------------------
869 
870         /**
871         * Gets the indexth string in each of the argument factor block
872         * @param index array with each index corresponding to each factor block
873         * @param length length of the array index
874         * @return the combined string of the array of indexth factor string in
875         *         factor block
876         */
getFactorString(int index[], int length)877         private String getFactorString(int index[], int length)
878         {
879             int size = m_factor_.length;
880             if (index == null || length != size) {
881                 return null;
882             }
883 
884             synchronized (m_utilStringBuffer_) {
885                 m_utilStringBuffer_.setLength(0);
886                 int count = 0;
887                 int factor;
888                 size --;
889                 for (int i = 0; i <= size; i ++) {
890                     factor = m_factor_[i];
891                     count = UCharacterUtility.skipNullTermByteSubString(
892                                              m_factorstring_, count, index[i]);
893                     count = UCharacterUtility.getNullTermByteSubString(
894                                           m_utilStringBuffer_, m_factorstring_,
895                                           count);
896                     if (i != size) {
897                         count = UCharacterUtility.skipNullTermByteSubString(
898                                                        m_factorstring_, count,
899                                                        factor - index[i] - 1);
900                     }
901                 }
902                 return m_utilStringBuffer_.toString();
903             }
904         }
905 
906         /**
907         * Compares the indexth string in each of the argument factor block with
908         * the argument string
909         * @param index array with each index corresponding to each factor block
910         * @param length index array length
911         * @param str string to compare with
912         * @param offset of str to start comparison
913         * @return true if string matches
914         */
compareFactorString(int index[], int length, String str, int offset)915         private boolean compareFactorString(int index[], int length, String str,
916                                             int offset)
917         {
918             int size = m_factor_.length;
919             if (index == null || length != size)
920                 return false;
921 
922             int count = 0;
923             int strcount = offset;
924             int factor;
925             size --;
926             for (int i = 0; i <= size; i ++)
927             {
928                 factor = m_factor_[i];
929                 count = UCharacterUtility.skipNullTermByteSubString(
930                                           m_factorstring_, count, index[i]);
931                 strcount = UCharacterUtility.compareNullTermByteSubString(str,
932                                           m_factorstring_, strcount, count);
933                 if (strcount < 0) {
934                     return false;
935                 }
936 
937                 if (i != size) {
938                     count = UCharacterUtility.skipNullTermByteSubString(
939                                   m_factorstring_, count, factor - index[i]);
940                 }
941             }
942             if (strcount != str.length()) {
943                 return false;
944             }
945             return true;
946         }
947     }
948 
949     // package private data members --------------------------------------
950 
951     /**
952      * Size of each groups
953      */
954     int m_groupsize_ = 0;
955 
956     // package private methods --------------------------------------------
957 
958     /**
959     * Sets the token data
960     * @param token array of tokens
961     * @param tokenstring array of string values of the tokens
962     * @return false if there is a data error
963     */
setToken(char token[], byte tokenstring[])964     boolean setToken(char token[], byte tokenstring[])
965     {
966         if (token != null && tokenstring != null && token.length > 0 &&
967             tokenstring.length > 0) {
968             m_tokentable_ = token;
969             m_tokenstring_ = tokenstring;
970             return true;
971         }
972         return false;
973     }
974 
975     /**
976     * Set the algorithm name information array
977     * @param alg Algorithm information array
978     * @return true if the group string offset has been set correctly
979     */
setAlgorithm(AlgorithmName alg[])980     boolean setAlgorithm(AlgorithmName alg[])
981     {
982         if (alg != null && alg.length != 0) {
983             m_algorithm_ = alg;
984             return true;
985         }
986         return false;
987     }
988 
989     /**
990     * Sets the number of group and size of each group in number of char
991     * @param count number of groups
992     * @param size size of group in char
993     * @return true if group size is set correctly
994     */
setGroupCountSize(int count, int size)995     boolean setGroupCountSize(int count, int size)
996     {
997         if (count <= 0 || size <= 0) {
998             return false;
999         }
1000         m_groupcount_ = count;
1001         m_groupsize_ = size;
1002         return true;
1003     }
1004 
1005     /**
1006     * Sets the group name data
1007     * @param group index information array
1008     * @param groupstring name information array
1009     * @return false if there is a data error
1010     */
setGroup(char group[], byte groupstring[])1011     boolean setGroup(char group[], byte groupstring[])
1012     {
1013         if (group != null && groupstring != null && group.length > 0 &&
1014             groupstring.length > 0) {
1015             m_groupinfo_ = group;
1016             m_groupstring_ = groupstring;
1017             return true;
1018         }
1019         return false;
1020     }
1021 
1022     // private data members ----------------------------------------------
1023 
1024     /**
1025     * Data used in unames.icu
1026     */
1027     private char m_tokentable_[];
1028     private byte m_tokenstring_[];
1029     private char m_groupinfo_[];
1030     private byte m_groupstring_[];
1031     private AlgorithmName m_algorithm_[];
1032 
1033     /**
1034     * Group use.  Note - access must be synchronized.
1035     */
1036     private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1037     private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1038 
1039     /**
1040     * Default name of the name datafile
1041     */
1042     private static final String FILE_NAME_ = "unames.icu";
1043     /**
1044     * Shift count to retrieve group information
1045     */
1046     private static final int GROUP_SHIFT_ = 5;
1047     /**
1048     * Mask to retrieve the offset for a particular character within a group
1049     */
1050     private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1051 
1052     /**
1053     * Position of offsethigh in group information array
1054     */
1055     private static final int OFFSET_HIGH_OFFSET_ = 1;
1056 
1057     /**
1058     * Position of offsetlow in group information array
1059     */
1060     private static final int OFFSET_LOW_OFFSET_ = 2;
1061     /**
1062     * Double nibble indicator, any nibble > this number has to be combined
1063     * with its following nibble
1064     */
1065     private static final int SINGLE_NIBBLE_MAX_ = 11;
1066 
1067     /*
1068      * Maximum length of character names (regular & 1.0).
1069      */
1070     //private static int MAX_NAME_LENGTH_ = 0;
1071     /*
1072      * Maximum length of ISO comments.
1073      */
1074     //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1075 
1076     /**
1077      * Set of chars used in character names (regular & 1.0).
1078      * Chars are platform-dependent (can be EBCDIC).
1079      */
1080     private int m_nameSet_[] = new int[8];
1081     /**
1082      * Set of chars used in ISO comments. (regular & 1.0).
1083      * Chars are platform-dependent (can be EBCDIC).
1084      */
1085     private int m_ISOCommentSet_[] = new int[8];
1086     /**
1087      * Utility StringBuffer
1088      */
1089     private StringBuffer m_utilStringBuffer_ = new StringBuffer();
1090     /**
1091      * Utility int buffer
1092      */
1093     private int m_utilIntBuffer_[] = new int[2];
1094     /**
1095      * Maximum ISO comment length
1096      */
1097     private int m_maxISOCommentLength_;
1098     /**
1099      * Maximum name length
1100      */
1101     private int m_maxNameLength_;
1102     /**
1103      * Type names used for extended names
1104      */
1105     private static final String TYPE_NAMES_[] = {"unassigned",
1106                                                  "uppercase letter",
1107                                                  "lowercase letter",
1108                                                  "titlecase letter",
1109                                                  "modifier letter",
1110                                                  "other letter",
1111                                                  "non spacing mark",
1112                                                  "enclosing mark",
1113                                                  "combining spacing mark",
1114                                                  "decimal digit number",
1115                                                  "letter number",
1116                                                  "other number",
1117                                                  "space separator",
1118                                                  "line separator",
1119                                                  "paragraph separator",
1120                                                  "control",
1121                                                  "format",
1122                                                  "private use area",
1123                                                  "surrogate",
1124                                                  "dash punctuation",
1125                                                  "start punctuation",
1126                                                  "end punctuation",
1127                                                  "connector punctuation",
1128                                                  "other punctuation",
1129                                                  "math symbol",
1130                                                  "currency symbol",
1131                                                  "modifier symbol",
1132                                                  "other symbol",
1133                                                  "initial punctuation",
1134                                                  "final punctuation",
1135                                                  "noncharacter",
1136                                                  "lead surrogate",
1137                                                  "trail surrogate"};
1138     /**
1139      * Unknown type name
1140      */
1141     private static final String UNKNOWN_TYPE_NAME_ = "unknown";
1142     /**
1143      * Not a character type
1144      */
1145     private static final int NON_CHARACTER_
1146                                     = UCharacterCategory.CHAR_CATEGORY_COUNT;
1147     /**
1148     * Lead surrogate type
1149     */
1150     private static final int LEAD_SURROGATE_
1151                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1152     /**
1153     * Trail surrogate type
1154     */
1155     private static final int TRAIL_SURROGATE_
1156                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1157     /**
1158     * Extended category count
1159     */
1160     static final int EXTENDED_CATEGORY_
1161                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1162 
1163     // private constructor ------------------------------------------------
1164 
1165     /**
1166     * <p>Protected constructor for use in UCharacter.</p>
1167     * @exception IOException thrown when data reading fails
1168     */
UCharacterName()1169     private UCharacterName() throws IOException
1170     {
1171         ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
1172         UCharacterNameReader reader = new UCharacterNameReader(b);
1173         reader.read(this);
1174     }
1175 
1176     // private methods ---------------------------------------------------
1177 
1178     /**
1179     * Gets the algorithmic name for the argument character
1180     * @param ch character to determine name for
1181     * @param choice name choice
1182     * @return the algorithmic name or null if not found
1183     */
getAlgName(int ch, int choice)1184     private String getAlgName(int ch, int choice)
1185     {
1186         /* Only the normative character name can be algorithmic. */
1187         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
1188             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
1189         ) {
1190             // index in terms integer index
1191             synchronized (m_utilStringBuffer_) {
1192                 m_utilStringBuffer_.setLength(0);
1193 
1194                 for (int index = m_algorithm_.length - 1; index >= 0; index --)
1195                 {
1196                    if (m_algorithm_[index].contains(ch)) {
1197                       m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1198                       return m_utilStringBuffer_.toString();
1199                    }
1200                 }
1201             }
1202         }
1203         return null;
1204     }
1205 
1206     /**
1207     * Getting the character with the tokenized argument name
1208     * @param name of the character
1209     * @return character with the tokenized argument name or -1 if character
1210     *         is not found
1211     */
getGroupChar(String name, int choice)1212     private synchronized int getGroupChar(String name, int choice)
1213     {
1214         for (int i = 0; i < m_groupcount_; i ++) {
1215             // populating the data set of grouptable
1216 
1217             int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1218                                                   m_grouplengths_);
1219 
1220             // shift out to function
1221             int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
1222                                       choice);
1223             if (result != -1) {
1224                 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1225                          | result;
1226             }
1227         }
1228         return -1;
1229     }
1230 
1231     /**
1232     * Compares and retrieve character if name is found within the argument
1233     * group
1234     * @param index index where the set of names reside in the group block
1235     * @param length list of lengths of the strings
1236     * @param name character name to search for
1237     * @param choice of either 1.0 or the most current unicode name
1238     * @return relative character in the group which matches name, otherwise if
1239     *         not found, -1 will be returned
1240     */
getGroupChar(int index, char length[], String name, int choice)1241     private int getGroupChar(int index, char length[], String name,
1242                              int choice)
1243     {
1244         byte b = 0;
1245         char token;
1246         int len;
1247         int namelen = name.length();
1248         int nindex;
1249         int count;
1250 
1251         for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
1252             nindex = 0;
1253             len = length[result];
1254 
1255             if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
1256                 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
1257             ) {
1258                 /*
1259                  * skip the modern name if it is not requested _and_
1260                  * if the semicolon byte value is a character, not a token number
1261                  */
1262                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
1263                 do {
1264                     int oldindex = index;
1265                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
1266                                                          index, len, (byte)';');
1267                     len -= (index - oldindex);
1268                 } while(--fieldIndex>0);
1269             }
1270 
1271             // number of tokens is > the length of the name
1272             // write each letter directly, and write a token word per token
1273             for (count = 0; count < len && nindex != -1 && nindex < namelen;
1274                 ) {
1275                 b = m_groupstring_[index + count];
1276                 count ++;
1277 
1278                 if (b >= m_tokentable_.length) {
1279                     if (name.charAt(nindex ++) != (b & 0xFF)) {
1280                         nindex = -1;
1281                     }
1282                 }
1283                 else {
1284                     token = m_tokentable_[b & 0xFF];
1285                     if (token == 0xFFFE) {
1286                         // this is a lead byte for a double-byte token
1287                         token = m_tokentable_[b << 8 |
1288                                    (m_groupstring_[index + count] & 0x00ff)];
1289                         count ++;
1290                     }
1291                     if (token == 0xFFFF) {
1292                         if (name.charAt(nindex ++) != (b & 0xFF)) {
1293                             nindex = -1;
1294                         }
1295                     }
1296                     else {
1297                         // compare token with name
1298                         nindex = UCharacterUtility.compareNullTermByteSubString(
1299                                         name, m_tokenstring_, nindex, token);
1300                     }
1301                 }
1302             }
1303 
1304             if (namelen == nindex &&
1305                 (count == len || m_groupstring_[index + count] == ';')) {
1306                 return result;
1307             }
1308 
1309             index += len;
1310         }
1311         return -1;
1312     }
1313 
1314     /**
1315     * Gets the character extended type
1316     * @param ch character to be tested
1317     * @return extended type it is associated with
1318     */
getType(int ch)1319     private static int getType(int ch)
1320     {
1321         if (UCharacterUtility.isNonCharacter(ch)) {
1322             // not a character we return a invalid category count
1323             return NON_CHARACTER_;
1324         }
1325         int result = UCharacter.getType(ch);
1326         if (result == UCharacterCategory.SURROGATE) {
1327             if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1328                 result = LEAD_SURROGATE_;
1329             }
1330             else {
1331                 result = TRAIL_SURROGATE_;
1332             }
1333         }
1334         return result;
1335     }
1336 
1337     /**
1338     * Getting the character with extended name of the form <....>.
1339     * @param name of the character to be found
1340     * @param choice name choice
1341     * @return character associated with the name, -1 if such character is not
1342     *                   found and -2 if we should continue with the search.
1343     */
getExtendedChar(String name, int choice)1344     private static int getExtendedChar(String name, int choice)
1345     {
1346         if (name.charAt(0) == '<') {
1347             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1348                 int endIndex = name.length() - 1;
1349                 if (name.charAt(endIndex) == '>') {
1350                     int startIndex = name.lastIndexOf('-');
1351                     if (startIndex >= 0) { // We've got a category.
1352                         startIndex ++;
1353                         int result = -1;
1354                         try {
1355                             result = Integer.parseInt(
1356                                         name.substring(startIndex, endIndex),
1357                                         16);
1358                         }
1359                         catch (NumberFormatException e) {
1360                             return -1;
1361                         }
1362                         // Now validate the category name. We could use a
1363                         // binary search, or a trie, if we really wanted to.
1364                         String type = name.substring(1, startIndex - 1);
1365                         int length = TYPE_NAMES_.length;
1366                         for (int i = 0; i < length; ++ i) {
1367                             if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1368                                 if (getType(result) == i) {
1369                                     return result;
1370                                 }
1371                                 break;
1372                             }
1373                         }
1374                     }
1375                 }
1376             }
1377             return -1;
1378         }
1379         return -2;
1380     }
1381 
1382     // sets of name characters, maximum name lengths -----------------------
1383 
1384     /**
1385      * Adds a codepoint into a set of ints.
1386      * Equivalent to SET_ADD.
1387      * @param set set to add to
1388      * @param ch 16 bit char to add
1389      */
add(int set[], char ch)1390     private static void add(int set[], char ch)
1391     {
1392         set[ch >>> 5] |= 1 << (ch & 0x1f);
1393     }
1394 
1395     /**
1396      * Checks if a codepoint is a part of a set of ints.
1397      * Equivalent to SET_CONTAINS.
1398      * @param set set to check in
1399      * @param ch 16 bit char to check
1400      * @return true if codepoint is part of the set, false otherwise
1401      */
contains(int set[], char ch)1402     private static boolean contains(int set[], char ch)
1403     {
1404         return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1405     }
1406 
1407     /**
1408      * Adds all characters of the argument str and gets the length
1409      * Equivalent to calcStringSetLength.
1410      * @param set set to add all chars of str to
1411      * @param str string to add
1412      */
add(int set[], String str)1413     private static int add(int set[], String str)
1414     {
1415         int result = str.length();
1416 
1417         for (int i = result - 1; i >= 0; i --) {
1418             add(set, str.charAt(i));
1419         }
1420         return result;
1421     }
1422 
1423     /**
1424      * Adds all characters of the argument str and gets the length
1425      * Equivalent to calcStringSetLength.
1426      * @param set set to add all chars of str to
1427      * @param str string to add
1428      */
add(int set[], StringBuffer str)1429     private static int add(int set[], StringBuffer str)
1430     {
1431         int result = str.length();
1432 
1433         for (int i = result - 1; i >= 0; i --) {
1434             add(set, str.charAt(i));
1435         }
1436         return result;
1437     }
1438 
1439     /**
1440      * Adds all algorithmic names into the name set.
1441      * Equivalent to part of calcAlgNameSetsLengths.
1442      * @param maxlength length to compare to
1443      * @return the maximum length of any possible algorithmic name if it is >
1444      *         maxlength, otherwise maxlength is returned.
1445      */
addAlgorithmName(int maxlength)1446     private int addAlgorithmName(int maxlength)
1447     {
1448         int result = 0;
1449         for (int i = m_algorithm_.length - 1; i >= 0; i --) {
1450             result = m_algorithm_[i].add(m_nameSet_, maxlength);
1451             if (result > maxlength) {
1452                 maxlength = result;
1453             }
1454         }
1455         return maxlength;
1456     }
1457 
1458     /**
1459      * Adds all extended names into the name set.
1460      * Equivalent to part of calcExtNameSetsLengths.
1461      * @param maxlength length to compare to
1462      * @return the maxlength of any possible extended name.
1463      */
addExtendedName(int maxlength)1464     private int addExtendedName(int maxlength)
1465     {
1466         for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
1467             // for each category, count the length of the category name
1468             // plus 9 =
1469             // 2 for <>
1470             // 1 for -
1471             // 6 for most hex digits per code point
1472             int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1473             if (length > maxlength) {
1474                 maxlength = length;
1475             }
1476         }
1477         return maxlength;
1478     }
1479 
1480     /**
1481      * Adds names of a group to the argument set.
1482      * Equivalent to calcNameSetLength.
1483      * @param offset of the group name string in byte count
1484      * @param length of the group name string
1485      * @param tokenlength array to store the length of each token
1486      * @param set to add to
1487      * @return the length of the name string and the length of the group
1488      *         string parsed
1489      */
addGroupName(int offset, int length, byte tokenlength[], int set[])1490     private int[] addGroupName(int offset, int length, byte tokenlength[],
1491                                int set[])
1492     {
1493         int resultnlength = 0;
1494         int resultplength = 0;
1495         while (resultplength < length) {
1496             char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
1497             resultplength ++;
1498             if (b == ';') {
1499                 break;
1500             }
1501 
1502             if (b >= m_tokentable_.length) {
1503                 add(set, b); // implicit letter
1504                 resultnlength ++;
1505             }
1506             else {
1507                 char token = m_tokentable_[b & 0x00ff];
1508                 if (token == 0xFFFE) {
1509                     // this is a lead byte for a double-byte token
1510                     b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
1511                                          & 0x00ff));
1512                     token = m_tokentable_[b];
1513                     resultplength ++;
1514                 }
1515                 if (token == 0xFFFF) {
1516                     add(set, b);
1517                     resultnlength ++;
1518                 }
1519                 else {
1520                     // count token word
1521                     // use cached token length
1522                     byte tlength = tokenlength[b];
1523                     if (tlength == 0) {
1524                         synchronized (m_utilStringBuffer_) {
1525                             m_utilStringBuffer_.setLength(0);
1526                             UCharacterUtility.getNullTermByteSubString(
1527                                            m_utilStringBuffer_, m_tokenstring_,
1528                                            token);
1529                             tlength = (byte)add(set, m_utilStringBuffer_);
1530                         }
1531                         tokenlength[b] = tlength;
1532                     }
1533                     resultnlength += tlength;
1534                 }
1535             }
1536         }
1537         m_utilIntBuffer_[0] = resultnlength;
1538         m_utilIntBuffer_[1] = resultplength;
1539         return m_utilIntBuffer_;
1540     }
1541 
1542     /**
1543      * Adds names of all group to the argument set.
1544      * Sets the data member m_max*Length_.
1545      * Method called only once.
1546      * Equivalent to calcGroupNameSetsLength.
1547      * @param maxlength length to compare to
1548      */
addGroupName(int maxlength)1549     private void addGroupName(int maxlength)
1550     {
1551         int maxisolength = 0;
1552         char offsets[] = new char[LINES_PER_GROUP_ + 2];
1553         char lengths[] = new char[LINES_PER_GROUP_ + 2];
1554         byte tokenlengths[] = new byte[m_tokentable_.length];
1555 
1556         // enumerate all groups
1557         // for (int i = m_groupcount_ - 1; i >= 0; i --) {
1558         for (int i = 0; i < m_groupcount_ ; i ++) {
1559             int offset = getGroupLengths(i, offsets, lengths);
1560             // enumerate all lines in each group
1561             // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1562             //    linenumber --) {
1563             for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
1564                 linenumber ++) {
1565                 int lineoffset = offset + offsets[linenumber];
1566                 int length = lengths[linenumber];
1567                 if (length == 0) {
1568                     continue;
1569                 }
1570 
1571                 // read regular name
1572                 int parsed[] = addGroupName(lineoffset, length, tokenlengths,
1573                                             m_nameSet_);
1574                 if (parsed[0] > maxlength) {
1575                     // 0 for name length
1576                     maxlength = parsed[0];
1577                 }
1578                 lineoffset += parsed[1];
1579                 if (parsed[1] >= length) {
1580                     // 1 for parsed group string length
1581                     continue;
1582                 }
1583                 length -= parsed[1];
1584                 // read Unicode 1.0 name
1585                 parsed = addGroupName(lineoffset, length, tokenlengths,
1586                                       m_nameSet_);
1587                 if (parsed[0] > maxlength) {
1588                     // 0 for name length
1589                     maxlength = parsed[0];
1590                 }
1591                 lineoffset += parsed[1];
1592                 if (parsed[1] >= length) {
1593                     // 1 for parsed group string length
1594                     continue;
1595                 }
1596                 length -= parsed[1];
1597                 // read ISO comment
1598                 parsed = addGroupName(lineoffset, length, tokenlengths,
1599                                       m_ISOCommentSet_);
1600                 if (parsed[1] > maxisolength) {
1601                     maxisolength = length;
1602                 }
1603             }
1604         }
1605 
1606         // set gMax... - name length last for threading
1607         m_maxISOCommentLength_ = maxisolength;
1608         m_maxNameLength_ = maxlength;
1609     }
1610 
1611     /**
1612      * Sets up the name sets and the calculation of the maximum lengths.
1613      * Equivalent to calcNameSetsLengths.
1614      */
initNameSetsLengths()1615     private boolean initNameSetsLengths()
1616     {
1617         if (m_maxNameLength_ > 0) {
1618             return true;
1619         }
1620 
1621         String extra = "0123456789ABCDEF<>-";
1622         // set hex digits, used in various names, and <>-, used in extended
1623         // names
1624         for (int i = extra.length() - 1; i >= 0; i --) {
1625             add(m_nameSet_, extra.charAt(i));
1626         }
1627 
1628         // set sets and lengths from algorithmic names
1629         m_maxNameLength_ = addAlgorithmName(0);
1630         // set sets and lengths from extended names
1631         m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1632         // set sets and lengths from group names, set global maximum values
1633         addGroupName(m_maxNameLength_);
1634         return true;
1635     }
1636 
1637     /**
1638      * Converts the char set cset into a Unicode set uset.
1639      * Equivalent to charSetToUSet.
1640      * @param set Set of 256 bit flags corresponding to a set of chars.
1641      * @param uset USet to receive characters. Existing contents are deleted.
1642      */
convert(int set[], UnicodeSet uset)1643     private void convert(int set[], UnicodeSet uset)
1644     {
1645         uset.clear();
1646         if (!initNameSetsLengths()) {
1647             return;
1648         }
1649 
1650         // build a char string with all chars that are used in character names
1651         for (char c = 255; c > 0; c --) {
1652             if (contains(set, c)) {
1653                 uset.add(c);
1654             }
1655         }
1656     }
1657 }
1658