• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package ohos.global.icu.impl;
12 
13 import java.io.IOException;
14 import java.nio.ByteBuffer;
15 import java.util.Iterator;
16 import java.util.MissingResourceException;
17 
18 import ohos.global.icu.lang.UCharacter;
19 import ohos.global.icu.lang.UCharacter.HangulSyllableType;
20 import ohos.global.icu.lang.UCharacter.NumericType;
21 import ohos.global.icu.lang.UCharacterCategory;
22 import ohos.global.icu.lang.UProperty;
23 import ohos.global.icu.lang.UScript;
24 import ohos.global.icu.text.Normalizer2;
25 import ohos.global.icu.text.UTF16;
26 import ohos.global.icu.text.UnicodeSet;
27 import ohos.global.icu.util.CodePointMap;
28 import ohos.global.icu.util.CodePointTrie;
29 import ohos.global.icu.util.ICUException;
30 import ohos.global.icu.util.ICUUncheckedIOException;
31 import ohos.global.icu.util.VersionInfo;
32 
33 /**
34 * <p>Internal class used for Unicode character property database.</p>
35 * <p>This classes store binary data read from uprops.icu.
36 * It does not have the capability to parse the data into more high-level
37 * information. It only returns bytes of information when required.</p>
38 * <p>Due to the form most commonly used for retrieval, array of char is used
39 * to store the binary data.</p>
40 * <p>UCharacterPropertyDB also contains information on accessing indexes to
41 * significant points in the binary data.</p>
42 * <p>Responsibility for molding the binary data into more meaning form lies on
43 * <a href=UCharacter.html>UCharacter</a>.</p>
44 * @author Syn Wee Quek
45 * @hide exposed on OHOS
46 */
47 
48 public final class UCharacterProperty
49 {
50     // public data members -----------------------------------------------
51 
52     /*
53      * public singleton instance
54      */
55     public static final UCharacterProperty INSTANCE;
56 
57     /**
58     * Trie data
59     */
60     public Trie2_16 m_trie_;
61     /**
62     * Unicode version
63     */
64     public VersionInfo m_unicodeVersion_;
65     /**
66     * Latin capital letter i with dot above
67     */
68     public static final char LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_ = 0x130;
69     /**
70     * Latin small letter i with dot above
71     */
72     public static final char LATIN_SMALL_LETTER_DOTLESS_I_ = 0x131;
73     /**
74     * Latin lowercase i
75     */
76     public static final char LATIN_SMALL_LETTER_I_ = 0x69;
77     /**
78     * Character type mask
79     */
80     public static final int TYPE_MASK = 0x1F;
81 
82     // uprops.h enum UPropertySource --------------------------------------- ***
83 
84     /** No source, not a supported property. */
85     public static final int SRC_NONE=0;
86     /** From uchar.c/uprops.icu main trie */
87     public static final int SRC_CHAR=1;
88     /** From uchar.c/uprops.icu properties vectors trie */
89     public static final int SRC_PROPSVEC=2;
90     /** From unames.c/unames.icu */
91     public static final int SRC_NAMES=3;
92     /** From ucase.c/ucase.icu */
93     public static final int SRC_CASE=4;
94     /** From ubidi_props.c/ubidi.icu */
95     public static final int SRC_BIDI=5;
96     /** From uchar.c/uprops.icu main trie as well as properties vectors trie */
97     public static final int SRC_CHAR_AND_PROPSVEC=6;
98     /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
99     public static final int SRC_CASE_AND_NORM=7;
100     /** From normalizer2impl.cpp/nfc.nrm */
101     public static final int SRC_NFC=8;
102     /** From normalizer2impl.cpp/nfkc.nrm */
103     public static final int SRC_NFKC=9;
104     /** From normalizer2impl.cpp/nfkc_cf.nrm */
105     public static final int SRC_NFKC_CF=10;
106     /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
107     public static final int SRC_NFC_CANON_ITER=11;
108     // Text layout properties.
109     public static final int SRC_INPC=12;
110     public static final int SRC_INSC=13;
111     public static final int SRC_VO=14;
112     /** One more than the highest UPropertySource (SRC_) constant. */
113     public static final int SRC_COUNT=15;
114 
115     private static final class LayoutProps {
116         private static final class IsAcceptable implements ICUBinary.Authenticate {
117             @Override
isDataVersionAcceptable(byte version[])118             public boolean isDataVersionAcceptable(byte version[]) {
119                 return version[0] == 1;
120             }
121         }
122         private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
123         private static final int DATA_FORMAT = 0x4c61796f;  // "Layo"
124 
125         // indexes into indexes[]
126         // Element 0 stores the length of the indexes[] array.
127         //ivate static final int IX_INDEXES_LENGTH = 0;
128         // Elements 1..7 store the tops of consecutive code point tries.
129         // No trie is stored if the difference between two of these is less than 16.
130         private static final int IX_INPC_TRIE_TOP = 1;
131         private static final int IX_INSC_TRIE_TOP = 2;
132         private static final int IX_VO_TRIE_TOP = 3;
133         //ivate static final int IX_RESERVED_TOP = 4;
134 
135         //ivate static final int IX_TRIES_TOP = 7;
136 
137         private static final int IX_MAX_VALUES = 9;
138 
139         // Length of indexes[]. Multiple of 4 to 16-align the tries.
140         //ivate static final int IX_COUNT = 12;
141 
142         private static final int MAX_INPC_SHIFT = 24;
143         private static final int MAX_INSC_SHIFT = 16;
144         private static final int MAX_VO_SHIFT = 8;
145 
146         static final LayoutProps INSTANCE = new LayoutProps();
147 
148         CodePointTrie inpcTrie = null;  // Indic_Positional_Category
149         CodePointTrie inscTrie = null;  // Indic_Syllabic_Category
150         CodePointTrie voTrie = null;  // Vertical_Orientation
151 
152         int maxInpcValue = 0;
153         int maxInscValue = 0;
154         int maxVoValue = 0;
155 
LayoutProps()156         LayoutProps() {
157             ByteBuffer bytes = ICUBinary.getRequiredData("ulayout.icu");
158             try {
159                 ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
160                 int startPos = bytes.position();
161                 int indexesLength = bytes.getInt();  // inIndexes[IX_INDEXES_LENGTH]
162                 if (indexesLength < 12) {
163                     throw new ICUUncheckedIOException(
164                             "Text layout properties data: not enough indexes");
165                 }
166                 int[] inIndexes = new int[indexesLength];
167                 inIndexes[0] = indexesLength;
168                 for (int i = 1; i < indexesLength; ++i) {
169                     inIndexes[i] = bytes.getInt();
170                 }
171 
172                 int offset = indexesLength * 4;
173                 int top = inIndexes[IX_INPC_TRIE_TOP];
174                 int trieSize = top - offset;
175                 if (trieSize >= 16) {
176                     inpcTrie = CodePointTrie.fromBinary(null, null, bytes);
177                 }
178                 int pos = bytes.position() - startPos;
179                 assert top >= pos;
180                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
181                 offset = top;
182                 top = inIndexes[IX_INSC_TRIE_TOP];
183                 trieSize = top - offset;
184                 if (trieSize >= 16) {
185                     inscTrie = CodePointTrie.fromBinary(null, null, bytes);
186                 }
187                 pos = bytes.position() - startPos;
188                 assert top >= pos;
189                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
190                 offset = top;
191                 top = inIndexes[IX_VO_TRIE_TOP];
192                 trieSize = top - offset;
193                 if (trieSize >= 16) {
194                     voTrie = CodePointTrie.fromBinary(null, null, bytes);
195                 }
196                 pos = bytes.position() - startPos;
197                 assert top >= pos;
198                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
199 
200                 int maxValues = inIndexes[IX_MAX_VALUES];
201                 maxInpcValue = maxValues >>> MAX_INPC_SHIFT;
202                 maxInscValue = (maxValues >> MAX_INSC_SHIFT) & 0xff;
203                 maxVoValue = (maxValues >> MAX_VO_SHIFT) & 0xff;
204             } catch(IOException e) {
205                 throw new ICUUncheckedIOException(e);
206             }
207         }
208 
addPropertyStarts(int src, UnicodeSet set)209         public UnicodeSet addPropertyStarts(int src, UnicodeSet set) {
210             CodePointTrie trie;
211             switch (src) {
212             case SRC_INPC:
213                 trie = inpcTrie;
214                 break;
215             case SRC_INSC:
216                 trie = inscTrie;
217                 break;
218             case SRC_VO:
219                 trie = voTrie;
220                 break;
221             default:
222                 throw new IllegalStateException();
223             }
224 
225             if (trie == null) {
226                 throw new MissingResourceException(
227                         "no data for one of the text layout properties; src=" + src,
228                         "LayoutProps", "");
229             }
230 
231             // Add the start code point of each same-value range of the trie.
232             CodePointMap.Range range = new CodePointMap.Range();
233             int start = 0;
234             while (trie.getRange(start, null, range)) {
235                 set.add(start);
236                 start = range.getEnd() + 1;
237             }
238             return set;
239         }
240     }
241 
242     // public methods ----------------------------------------------------
243 
244     /**
245     * Gets the main property value for code point ch.
246     * @param ch code point whose property value is to be retrieved
247     * @return property value of code point
248     */
getProperty(int ch)249     public final int getProperty(int ch)
250     {
251         return m_trie_.get(ch);
252     }
253 
254     /**
255      * Gets the unicode additional properties.
256      * Java version of C u_getUnicodeProperties().
257      * @param codepoint codepoint whose additional properties is to be
258      *                  retrieved
259      * @param column The column index.
260      * @return unicode properties
261      */
getAdditional(int codepoint, int column)262     public int getAdditional(int codepoint, int column) {
263         assert column >= 0;
264         if (column >= m_additionalColumnsCount_) {
265             return 0;
266         }
267         return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
268     }
269 
270     static final int MY_MASK = UCharacterProperty.TYPE_MASK
271         & ((1<<UCharacterCategory.UPPERCASE_LETTER) |
272             (1<<UCharacterCategory.LOWERCASE_LETTER) |
273             (1<<UCharacterCategory.TITLECASE_LETTER) |
274             (1<<UCharacterCategory.MODIFIER_LETTER) |
275             (1<<UCharacterCategory.OTHER_LETTER));
276 
277 
278        /**
279      * <p>Get the "age" of the code point.</p>
280      * <p>The "age" is the Unicode version when the code point was first
281      * designated (as a non-character or for Private Use) or assigned a
282      * character.</p>
283      * <p>This can be useful to avoid emitting code points to receiving
284      * processes that do not accept newer characters.</p>
285      * <p>The data is from the UCD file DerivedAge.txt.</p>
286      * <p>This API does not check the validity of the codepoint.</p>
287      * @param codepoint The code point.
288      * @return the Unicode version number
289      */
getAge(int codepoint)290     public VersionInfo getAge(int codepoint)
291     {
292         int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
293         return VersionInfo.getInstance(
294                            (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
295                            version & LAST_NIBBLE_MASK_, 0, 0);
296     }
297 
298     private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED);
299     private static final int GC_CC_MASK = getMask(UCharacter.CONTROL);
300     private static final int GC_CS_MASK = getMask(UCharacter.SURROGATE);
301     private static final int GC_ZS_MASK = getMask(UCharacter.SPACE_SEPARATOR);
302     private static final int GC_ZL_MASK = getMask(UCharacter.LINE_SEPARATOR);
303     private static final int GC_ZP_MASK = getMask(UCharacter.PARAGRAPH_SEPARATOR);
304     /** Mask constant for multiple UCharCategory bits (Z Separators). */
305     private static final int GC_Z_MASK = GC_ZS_MASK|GC_ZL_MASK|GC_ZP_MASK;
306 
307     /**
308      * Checks if c is in
309      * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
310      * with space=\p{Whitespace} and Control=Cc.
311      * Implements UCHAR_POSIX_GRAPH.
312      * @hide draft / provisional / internal are hidden on OHOS
313      */
isgraphPOSIX(int c)314     private static final boolean isgraphPOSIX(int c) {
315         /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
316         /* comparing ==0 returns FALSE for the categories mentioned */
317         return (getMask(UCharacter.getType(c))&
318                 (GC_CC_MASK|GC_CS_MASK|GC_CN_MASK|GC_Z_MASK))
319                ==0;
320     }
321 
322     // binary properties --------------------------------------------------- ***
323 
324     private class BinaryProperty {
325         int column;  // SRC_PROPSVEC column, or "source" if mask==0
326         int mask;
BinaryProperty(int column, int mask)327         BinaryProperty(int column, int mask) {
328             this.column=column;
329             this.mask=mask;
330         }
BinaryProperty(int source)331         BinaryProperty(int source) {
332             this.column=source;
333             this.mask=0;
334         }
getSource()335         final int getSource() {
336             return mask==0 ? column : SRC_PROPSVEC;
337         }
contains(int c)338         boolean contains(int c) {
339             // systematic, directly stored properties
340             return (getAdditional(c, column)&mask)!=0;
341         }
342     }
343 
344     private class CaseBinaryProperty extends BinaryProperty {  // case mapping properties
345         int which;
CaseBinaryProperty(int which)346         CaseBinaryProperty(int which) {
347             super(SRC_CASE);
348             this.which=which;
349         }
350         @Override
contains(int c)351         boolean contains(int c) {
352             return UCaseProps.INSTANCE.hasBinaryProperty(c, which);
353         }
354     }
355 
356     private class NormInertBinaryProperty extends BinaryProperty {  // UCHAR_NF*_INERT properties
357         int which;
NormInertBinaryProperty(int source, int which)358         NormInertBinaryProperty(int source, int which) {
359             super(source);
360             this.which=which;
361         }
362         @Override
contains(int c)363         boolean contains(int c) {
364             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_INERT).isInert(c);
365         }
366     }
367 
368     BinaryProperty[] binProps={
369         /*
370          * Binary-property implementations must be in order of corresponding UProperty,
371          * and there must be exactly one entry per binary UProperty.
372          */
373         new BinaryProperty(1, (1<<ALPHABETIC_PROPERTY_)),
374         new BinaryProperty(1, (1<<ASCII_HEX_DIGIT_PROPERTY_)),
375         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_CONTROL
376             @Override
377             boolean contains(int c) {
378                 return UBiDiProps.INSTANCE.isBidiControl(c);
379             }
380         },
381         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_MIRRORED
382             @Override
383             boolean contains(int c) {
384                 return UBiDiProps.INSTANCE.isMirrored(c);
385             }
386         },
387         new BinaryProperty(1, (1<<DASH_PROPERTY_)),
388         new BinaryProperty(1, (1<<DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_)),
389         new BinaryProperty(1, (1<<DEPRECATED_PROPERTY_)),
390         new BinaryProperty(1, (1<<DIACRITIC_PROPERTY_)),
391         new BinaryProperty(1, (1<<EXTENDER_PROPERTY_)),
392         new BinaryProperty(SRC_NFC) {  // UCHAR_FULL_COMPOSITION_EXCLUSION
393             @Override
394             boolean contains(int c) {
395                 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
396                 Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl;
397                 return impl.isCompNo(impl.getNorm16(c));
398             }
399         },
400         new BinaryProperty(1, (1<<GRAPHEME_BASE_PROPERTY_)),
401         new BinaryProperty(1, (1<<GRAPHEME_EXTEND_PROPERTY_)),
402         new BinaryProperty(1, (1<<GRAPHEME_LINK_PROPERTY_)),
403         new BinaryProperty(1, (1<<HEX_DIGIT_PROPERTY_)),
404         new BinaryProperty(1, (1<<HYPHEN_PROPERTY_)),
405         new BinaryProperty(1, (1<<ID_CONTINUE_PROPERTY_)),
406         new BinaryProperty(1, (1<<ID_START_PROPERTY_)),
407         new BinaryProperty(1, (1<<IDEOGRAPHIC_PROPERTY_)),
408         new BinaryProperty(1, (1<<IDS_BINARY_OPERATOR_PROPERTY_)),
409         new BinaryProperty(1, (1<<IDS_TRINARY_OPERATOR_PROPERTY_)),
410         new BinaryProperty(SRC_BIDI) {  // UCHAR_JOIN_CONTROL
411             @Override
412             boolean contains(int c) {
413                 return UBiDiProps.INSTANCE.isJoinControl(c);
414             }
415         },
416         new BinaryProperty(1, (1<<LOGICAL_ORDER_EXCEPTION_PROPERTY_)),
417         new CaseBinaryProperty(UProperty.LOWERCASE),
418         new BinaryProperty(1, (1<<MATH_PROPERTY_)),
419         new BinaryProperty(1, (1<<NONCHARACTER_CODE_POINT_PROPERTY_)),
420         new BinaryProperty(1, (1<<QUOTATION_MARK_PROPERTY_)),
421         new BinaryProperty(1, (1<<RADICAL_PROPERTY_)),
422         new CaseBinaryProperty(UProperty.SOFT_DOTTED),
423         new BinaryProperty(1, (1<<TERMINAL_PUNCTUATION_PROPERTY_)),
424         new BinaryProperty(1, (1<<UNIFIED_IDEOGRAPH_PROPERTY_)),
425         new CaseBinaryProperty(UProperty.UPPERCASE),
426         new BinaryProperty(1, (1<<WHITE_SPACE_PROPERTY_)),
427         new BinaryProperty(1, (1<<XID_CONTINUE_PROPERTY_)),
428         new BinaryProperty(1, (1<<XID_START_PROPERTY_)),
429         new CaseBinaryProperty(UProperty.CASE_SENSITIVE),
430         new BinaryProperty(1, (1<<S_TERM_PROPERTY_)),
431         new BinaryProperty(1, (1<<VARIATION_SELECTOR_PROPERTY_)),
432         new NormInertBinaryProperty(SRC_NFC, UProperty.NFD_INERT),
433         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKD_INERT),
434         new NormInertBinaryProperty(SRC_NFC, UProperty.NFC_INERT),
435         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKC_INERT),
436         new BinaryProperty(SRC_NFC_CANON_ITER) {  // UCHAR_SEGMENT_STARTER
437             @Override
438             boolean contains(int c) {
439                 return Norm2AllModes.getNFCInstance().impl.
440                     ensureCanonIterData().isCanonSegmentStarter(c);
441             }
442         },
443         new BinaryProperty(1, (1<<PATTERN_SYNTAX)),
444         new BinaryProperty(1, (1<<PATTERN_WHITE_SPACE)),
445         new BinaryProperty(SRC_CHAR_AND_PROPSVEC) {  // UCHAR_POSIX_ALNUM
446             @Override
447             boolean contains(int c) {
448                 return UCharacter.isUAlphabetic(c) || UCharacter.isDigit(c);
449             }
450         },
451         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_BLANK
452             @Override
453             boolean contains(int c) {
454                 // "horizontal space"
455                 if(c<=0x9f) {
456                     return c==9 || c==0x20; /* TAB or SPACE */
457                 } else {
458                     /* Zs */
459                     return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR;
460                 }
461             }
462         },
463         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_GRAPH
464             @Override
465             boolean contains(int c) {
466                 return isgraphPOSIX(c);
467             }
468         },
469         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_PRINT
470             @Override
471             boolean contains(int c) {
472                 /*
473                  * Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}.
474                  *
475                  * The only cntrl character in graph+blank is TAB (in blank).
476                  * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
477                  */
478                 return (UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(c);
479             }
480         },
481         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_XDIGIT
482             @Override
483             boolean contains(int c) {
484                 /* check ASCII and Fullwidth ASCII a-fA-F */
485                 if(
486                     (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
487                     (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
488                 ) {
489                     return true;
490                 }
491                 return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER;
492             }
493         },
494         new CaseBinaryProperty(UProperty.CASED),
495         new CaseBinaryProperty(UProperty.CASE_IGNORABLE),
496         new CaseBinaryProperty(UProperty.CHANGES_WHEN_LOWERCASED),
497         new CaseBinaryProperty(UProperty.CHANGES_WHEN_UPPERCASED),
498         new CaseBinaryProperty(UProperty.CHANGES_WHEN_TITLECASED),
499         new BinaryProperty(SRC_CASE_AND_NORM) {  // UCHAR_CHANGES_WHEN_CASEFOLDED
500             @Override
501             boolean contains(int c) {
502                 String nfd=Norm2AllModes.getNFCInstance().impl.getDecomposition(c);
503                 if(nfd!=null) {
504                     /* c has a decomposition */
505                     c=nfd.codePointAt(0);
506                     if(Character.charCount(c)!=nfd.length()) {
507                         /* multiple code points */
508                         c=-1;
509                     }
510                 } else if(c<0) {
511                     return false;  /* protect against bad input */
512                 }
513                 if(c>=0) {
514                     /* single code point */
515                     UCaseProps csp=UCaseProps.INSTANCE;
516                     UCaseProps.dummyStringBuilder.setLength(0);
517                     return csp.toFullFolding(c, UCaseProps.dummyStringBuilder,
518                                              UCharacter.FOLD_CASE_DEFAULT)>=0;
519                 } else {
520                     String folded=UCharacter.foldCase(nfd, true);
521                     return !folded.equals(nfd);
522                 }
523             }
524         },
525         new CaseBinaryProperty(UProperty.CHANGES_WHEN_CASEMAPPED),
526         new BinaryProperty(SRC_NFKC_CF) {  // UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
527             @Override
528             boolean contains(int c) {
529                 Normalizer2Impl kcf=Norm2AllModes.getNFKC_CFInstance().impl;
530                 String src=UTF16.valueOf(c);
531                 StringBuilder dest=new StringBuilder();
532                 // Small destCapacity for NFKC_CF(c).
533                 Normalizer2Impl.ReorderingBuffer buffer=new Normalizer2Impl.ReorderingBuffer(kcf, dest, 5);
534                 kcf.compose(src, 0, src.length(), false, true, buffer);
535                 return !Normalizer2Impl.UTF16Plus.equal(dest, src);
536             }
537         },
538         new BinaryProperty(2, 1<<PROPS_2_EMOJI),
539         new BinaryProperty(2, 1<<PROPS_2_EMOJI_PRESENTATION),
540         new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER),
541         new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER_BASE),
542         new BinaryProperty(2, 1<<PROPS_2_EMOJI_COMPONENT),
543         new BinaryProperty(SRC_PROPSVEC) {  // REGIONAL_INDICATOR
544             // Property starts are a subset of lb=RI etc.
545             @Override
546             boolean contains(int c) {
547                 return 0x1F1E6<=c && c<=0x1F1FF;
548             }
549         },
550         new BinaryProperty(1, 1<<PREPENDED_CONCATENATION_MARK),
551         new BinaryProperty(2, 1<<PROPS_2_EXTENDED_PICTOGRAPHIC),
552     };
553 
hasBinaryProperty(int c, int which)554     public boolean hasBinaryProperty(int c, int which) {
555          if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) {
556             // not a known binary property
557             return false;
558         } else {
559             return binProps[which].contains(c);
560         }
561     }
562 
563     // int-value and enumerated properties --------------------------------- ***
564 
getType(int c)565     public int getType(int c) {
566         return getProperty(c)&TYPE_MASK;
567     }
568 
569     /*
570      * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
571      * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
572      */
573     private static final int /* UHangulSyllableType */ gcbToHst[]={
574         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_OTHER */
575         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CONTROL */
576         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CR */
577         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_EXTEND */
578         HangulSyllableType.LEADING_JAMO,     /* U_GCB_L */
579         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_LF */
580         HangulSyllableType.LV_SYLLABLE,      /* U_GCB_LV */
581         HangulSyllableType.LVT_SYLLABLE,     /* U_GCB_LVT */
582         HangulSyllableType.TRAILING_JAMO,    /* U_GCB_T */
583         HangulSyllableType.VOWEL_JAMO        /* U_GCB_V */
584         /*
585          * Omit GCB values beyond what we need for hst.
586          * The code below checks for the array length.
587          */
588     };
589 
590     private class IntProperty {
591         int column;  // SRC_PROPSVEC column, or "source" if mask==0
592         int mask;
593         int shift;
IntProperty(int column, int mask, int shift)594         IntProperty(int column, int mask, int shift) {
595             this.column=column;
596             this.mask=mask;
597             this.shift=shift;
598         }
IntProperty(int source)599         IntProperty(int source) {
600             this.column=source;
601             this.mask=0;
602         }
getSource()603         final int getSource() {
604             return mask==0 ? column : SRC_PROPSVEC;
605         }
getValue(int c)606         int getValue(int c) {
607             // systematic, directly stored properties
608             return (getAdditional(c, column)&mask)>>>shift;
609         }
getMaxValue(int which)610         int getMaxValue(int which) {
611             return (getMaxValues(column)&mask)>>>shift;
612         }
613     }
614 
615     private class BiDiIntProperty extends IntProperty {
BiDiIntProperty()616         BiDiIntProperty() {
617             super(SRC_BIDI);
618         }
619         @Override
getMaxValue(int which)620         int getMaxValue(int which) {
621             return UBiDiProps.INSTANCE.getMaxValue(which);
622         }
623     }
624 
625     private class CombiningClassIntProperty extends IntProperty {
CombiningClassIntProperty(int source)626         CombiningClassIntProperty(int source) {
627             super(source);
628         }
629         @Override
getMaxValue(int which)630         int getMaxValue(int which) {
631             return 0xff;
632         }
633     }
634 
635     private class NormQuickCheckIntProperty extends IntProperty {  // UCHAR_NF*_QUICK_CHECK properties
636         int which;
637         int max;
NormQuickCheckIntProperty(int source, int which, int max)638         NormQuickCheckIntProperty(int source, int which, int max) {
639             super(source);
640             this.which=which;
641             this.max=max;
642         }
643         @Override
getValue(int c)644         int getValue(int c) {
645             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_QUICK_CHECK).getQuickCheck(c);
646         }
647         @Override
getMaxValue(int which)648         int getMaxValue(int which) {
649             return max;
650         }
651     }
652 
653     IntProperty intProps[]={
654         new BiDiIntProperty() {  // BIDI_CLASS
655             @Override
656             int getValue(int c) {
657                 return UBiDiProps.INSTANCE.getClass(c);
658             }
659         },
660         new IntProperty(0, BLOCK_MASK_, BLOCK_SHIFT_),
661         new CombiningClassIntProperty(SRC_NFC) {  // CANONICAL_COMBINING_CLASS
662             @Override
663             int getValue(int c) {
664                 return Normalizer2.getNFDInstance().getCombiningClass(c);
665             }
666         },
667         new IntProperty(2, DECOMPOSITION_TYPE_MASK_, 0),
668         new IntProperty(0, EAST_ASIAN_MASK_, EAST_ASIAN_SHIFT_),
669         new IntProperty(SRC_CHAR) {  // GENERAL_CATEGORY
670             @Override
671             int getValue(int c) {
672                 return getType(c);
673             }
674             @Override
675             int getMaxValue(int which) {
676                 return UCharacterCategory.CHAR_CATEGORY_COUNT-1;
677             }
678         },
679         new BiDiIntProperty() {  // JOINING_GROUP
680             @Override
681             int getValue(int c) {
682                 return UBiDiProps.INSTANCE.getJoiningGroup(c);
683             }
684         },
685         new BiDiIntProperty() {  // JOINING_TYPE
686             @Override
687             int getValue(int c) {
688                 return UBiDiProps.INSTANCE.getJoiningType(c);
689             }
690         },
691         new IntProperty(2, LB_MASK, LB_SHIFT),  // LINE_BREAK
692         new IntProperty(SRC_CHAR) {  // NUMERIC_TYPE
693             @Override
694             int getValue(int c) {
695                 return ntvGetType(getNumericTypeValue(getProperty(c)));
696             }
697             @Override
698             int getMaxValue(int which) {
699                 return NumericType.COUNT-1;
700             }
701         },
702         new IntProperty(SRC_PROPSVEC) {
703             @Override
704             int getValue(int c) {
705                 return UScript.getScript(c);
706             }
707             @Override
708             int getMaxValue(int which) {
709                 int scriptX=getMaxValues(0)&SCRIPT_X_MASK;
710                 return mergeScriptCodeOrIndex(scriptX);
711             }
712         },
713         new IntProperty(SRC_PROPSVEC) {  // HANGUL_SYLLABLE_TYPE
714             @Override
715             int getValue(int c) {
716                 /* see comments on gcbToHst[] above */
717                 int gcb=(getAdditional(c, 2)&GCB_MASK)>>>GCB_SHIFT;
718                 if(gcb<gcbToHst.length) {
719                     return gcbToHst[gcb];
720                 } else {
721                     return HangulSyllableType.NOT_APPLICABLE;
722                 }
723             }
724             @Override
725             int getMaxValue(int which) {
726                 return HangulSyllableType.COUNT-1;
727             }
728         },
729         // max=1=YES -- these are never "maybe", only "no" or "yes"
730         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFD_QUICK_CHECK, 1),
731         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKD_QUICK_CHECK, 1),
732         // max=2=MAYBE
733         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFC_QUICK_CHECK, 2),
734         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKC_QUICK_CHECK, 2),
735         new CombiningClassIntProperty(SRC_NFC) {  // LEAD_CANONICAL_COMBINING_CLASS
736             @Override
737             int getValue(int c) {
738                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)>>8;
739             }
740         },
741         new CombiningClassIntProperty(SRC_NFC) {  // TRAIL_CANONICAL_COMBINING_CLASS
742             @Override
743             int getValue(int c) {
744                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)&0xff;
745             }
746         },
747         new IntProperty(2, GCB_MASK, GCB_SHIFT),  // GRAPHEME_CLUSTER_BREAK
748         new IntProperty(2, SB_MASK, SB_SHIFT),  // SENTENCE_BREAK
749         new IntProperty(2, WB_MASK, WB_SHIFT),  // WORD_BREAK
750         new BiDiIntProperty() {  // BIDI_PAIRED_BRACKET_TYPE
751             @Override
752             int getValue(int c) {
753                 return UBiDiProps.INSTANCE.getPairedBracketType(c);
754             }
755         },
756         new IntProperty(SRC_INPC) {
757             @Override
758             int getValue(int c) {
759                 CodePointTrie trie = LayoutProps.INSTANCE.inpcTrie;
760                 return trie != null ? trie.get(c) : 0;
761             }
762             @Override
763             int getMaxValue(int which) {
764                 return LayoutProps.INSTANCE.maxInpcValue;
765             }
766         },
767         new IntProperty(SRC_INSC) {
768             @Override
769             int getValue(int c) {
770                 CodePointTrie trie = LayoutProps.INSTANCE.inscTrie;
771                 return trie != null ? trie.get(c) : 0;
772             }
773             @Override
774             int getMaxValue(int which) {
775                 return LayoutProps.INSTANCE.maxInscValue;
776             }
777         },
778         new IntProperty(SRC_VO) {
779             @Override
780             int getValue(int c) {
781                 CodePointTrie trie = LayoutProps.INSTANCE.voTrie;
782                 return trie != null ? trie.get(c) : 0;
783             }
784             @Override
785             int getMaxValue(int which) {
786                 return LayoutProps.INSTANCE.maxVoValue;
787             }
788         },
789     };
790 
getIntPropertyValue(int c, int which)791     public int getIntPropertyValue(int c, int which) {
792         if(which<UProperty.INT_START) {
793             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
794                 return binProps[which].contains(c) ? 1 : 0;
795             }
796         } else if(which<UProperty.INT_LIMIT) {
797             return intProps[which-UProperty.INT_START].getValue(c);
798         } else if (which == UProperty.GENERAL_CATEGORY_MASK) {
799             return getMask(getType(c));
800         }
801         return 0; // undefined
802     }
803 
getIntPropertyMaxValue(int which)804     public int getIntPropertyMaxValue(int which) {
805         if(which<UProperty.INT_START) {
806             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
807                 return 1;  // maximum TRUE for all binary properties
808             }
809         } else if(which<UProperty.INT_LIMIT) {
810             return intProps[which-UProperty.INT_START].getMaxValue(which);
811         }
812         return -1; // undefined
813     }
814 
getSource(int which)815     final int getSource(int which) {
816         if(which<UProperty.BINARY_START) {
817             return SRC_NONE; /* undefined */
818         } else if(which<UProperty.BINARY_LIMIT) {
819             return binProps[which].getSource();
820         } else if(which<UProperty.INT_START) {
821             return SRC_NONE; /* undefined */
822         } else if(which<UProperty.INT_LIMIT) {
823             return intProps[which-UProperty.INT_START].getSource();
824         } else if(which<UProperty.STRING_START) {
825             switch(which) {
826             case UProperty.GENERAL_CATEGORY_MASK:
827             case UProperty.NUMERIC_VALUE:
828                 return SRC_CHAR;
829 
830             default:
831                 return SRC_NONE;
832             }
833         } else if(which<UProperty.STRING_LIMIT) {
834             switch(which) {
835             case UProperty.AGE:
836                 return SRC_PROPSVEC;
837 
838             case UProperty.BIDI_MIRRORING_GLYPH:
839                 return SRC_BIDI;
840 
841             case UProperty.CASE_FOLDING:
842             case UProperty.LOWERCASE_MAPPING:
843             case UProperty.SIMPLE_CASE_FOLDING:
844             case UProperty.SIMPLE_LOWERCASE_MAPPING:
845             case UProperty.SIMPLE_TITLECASE_MAPPING:
846             case UProperty.SIMPLE_UPPERCASE_MAPPING:
847             case UProperty.TITLECASE_MAPPING:
848             case UProperty.UPPERCASE_MAPPING:
849                 return SRC_CASE;
850 
851             case UProperty.ISO_COMMENT:
852             case UProperty.NAME:
853             case UProperty.UNICODE_1_NAME:
854                 return SRC_NAMES;
855 
856             default:
857                 return SRC_NONE;
858             }
859         } else {
860             switch(which) {
861             case UProperty.SCRIPT_EXTENSIONS:
862                 return SRC_PROPSVEC;
863             default:
864                 return SRC_NONE; /* undefined */
865             }
866         }
867     }
868 
869     /**
870      * <p>
871      * Unicode property names and property value names are compared
872      * "loosely". Property[Value]Aliases.txt say:
873      * <quote>
874      *   "With loose matching of property names, the case distinctions,
875      *    whitespace, and '_' are ignored."
876      * </quote>
877      * </p>
878      * <p>
879      * This function does just that, for ASCII (char *) name strings.
880      * It is almost identical to ucnv_compareNames() but also ignores
881      * ASCII White_Space characters (U+0009..U+000d).
882      * </p>
883      * @param name1 name to compare
884      * @param name2 name to compare
885      * @return 0 if names are equal, < 0 if name1 is less than name2 and > 0
886      *         if name1 is greater than name2.
887      */
888     /* to be implemented in 2.4
889      * public static int comparePropertyNames(String name1, String name2)
890     {
891         int result = 0;
892         int i1 = 0;
893         int i2 = 0;
894         while (true) {
895             char ch1 = 0;
896             char ch2 = 0;
897             // Ignore delimiters '-', '_', and ASCII White_Space
898             if (i1 < name1.length()) {
899                 ch1 = name1.charAt(i1 ++);
900             }
901             while (ch1 == '-' || ch1 == '_' || ch1 == ' ' || ch1 == '\t'
902                    || ch1 == '\n' // synwee what is || ch1 == '\v'
903                    || ch1 == '\f' || ch1=='\r') {
904                 if (i1 < name1.length()) {
905                     ch1 = name1.charAt(i1 ++);
906                 }
907                 else {
908                     ch1 = 0;
909                 }
910             }
911             if (i2 < name2.length()) {
912                 ch2 = name2.charAt(i2 ++);
913             }
914             while (ch2 == '-' || ch2 == '_' || ch2 == ' ' || ch2 == '\t'
915                    || ch2 == '\n' // synwee what is || ch1 == '\v'
916                    || ch2 == '\f' || ch2=='\r') {
917                 if (i2 < name2.length()) {
918                     ch2 = name2.charAt(i2 ++);
919                 }
920                 else {
921                     ch2 = 0;
922                 }
923             }
924 
925             // If we reach the ends of both strings then they match
926             if (ch1 == 0 && ch2 == 0) {
927                 return 0;
928             }
929 
930             // Case-insensitive comparison
931             if (ch1 != ch2) {
932                 result = Character.toLowerCase(ch1)
933                                                 - Character.toLowerCase(ch2);
934                 if (result != 0) {
935                     return result;
936                 }
937             }
938         }
939     }
940     */
941 
942     /**
943      * Get the the maximum values for some enum/int properties.
944      * @return maximum values for the integer properties.
945      */
getMaxValues(int column)946     public int getMaxValues(int column)
947     {
948        // return m_maxBlockScriptValue_;
949 
950         switch(column) {
951         case 0:
952             return m_maxBlockScriptValue_;
953         case 2:
954             return m_maxJTGValue_;
955         default:
956             return 0;
957         }
958     }
959 
960     /**
961      * Gets the type mask
962      * @param type character type
963      * @return mask
964      */
getMask(int type)965     public static final int getMask(int type)
966     {
967         return 1 << type;
968     }
969 
970 
971     /**
972      * Returns the digit values of characters like 'A' - 'Z', normal,
973      * half-width and full-width. This method assumes that the other digit
974      * characters are checked by the calling method.
975      * @param ch character to test
976      * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
977      *         its corresponding digit will be returned.
978      */
getEuropeanDigit(int ch)979     public static int getEuropeanDigit(int ch) {
980         if ((ch > 0x7a && ch < 0xff21)
981             || ch < 0x41 || (ch > 0x5a && ch < 0x61)
982             || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
983             return -1;
984         }
985         if (ch <= 0x7a) {
986             // ch >= 0x41 or ch < 0x61
987             return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
988         }
989         // ch >= 0xff21
990         if (ch <= 0xff3a) {
991             return ch + 10 - 0xff21;
992         }
993         // ch >= 0xff41 && ch <= 0xff5a
994         return ch + 10 - 0xff41;
995     }
996 
digit(int c)997     public int digit(int c) {
998         int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
999         if(value<=9) {
1000             return value;
1001         } else {
1002             return -1;
1003         }
1004     }
1005 
getNumericValue(int c)1006     public int getNumericValue(int c) {
1007         // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
1008         int ntv = getNumericTypeValue(getProperty(c));
1009 
1010         if(ntv==NTV_NONE_) {
1011             return getEuropeanDigit(c);
1012         } else if(ntv<NTV_DIGIT_START_) {
1013             /* decimal digit */
1014             return ntv-NTV_DECIMAL_START_;
1015         } else if(ntv<NTV_NUMERIC_START_) {
1016             /* other digit */
1017             return ntv-NTV_DIGIT_START_;
1018         } else if(ntv<NTV_FRACTION_START_) {
1019             /* small integer */
1020             return ntv-NTV_NUMERIC_START_;
1021         } else if(ntv<NTV_LARGE_START_) {
1022             /* fraction */
1023             return -2;
1024         } else if(ntv<NTV_BASE60_START_) {
1025             /* large, single-significant-digit integer */
1026             int mant=(ntv>>5)-14;
1027             int exp=(ntv&0x1f)+2;
1028             if(exp<9 || (exp==9 && mant<=2)) {
1029                 int numValue=mant;
1030                 do {
1031                     numValue*=10;
1032                 } while(--exp>0);
1033                 return numValue;
1034             } else {
1035                 return -2;
1036             }
1037         } else if(ntv<NTV_FRACTION20_START_) {
1038             /* sexagesimal (base 60) integer */
1039             int numValue=(ntv>>2)-0xbf;
1040             int exp=(ntv&3)+1;
1041 
1042             switch(exp) {
1043             case 4:
1044                 numValue*=60*60*60*60;
1045                 break;
1046             case 3:
1047                 numValue*=60*60*60;
1048                 break;
1049             case 2:
1050                 numValue*=60*60;
1051                 break;
1052             case 1:
1053                 numValue*=60;
1054                 break;
1055             case 0:
1056             default:
1057                 break;
1058             }
1059 
1060             return numValue;
1061         } else if(ntv<NTV_RESERVED_START_) {
1062             // fraction-20 e.g. 3/80
1063             return -2;
1064         } else {
1065             /* reserved */
1066             return -2;
1067         }
1068     }
1069 
getUnicodeNumericValue(int c)1070     public double getUnicodeNumericValue(int c) {
1071         // equivalent to c version double u_getNumericValue(UChar32 c)
1072         int ntv = getNumericTypeValue(getProperty(c));
1073 
1074         if(ntv==NTV_NONE_) {
1075             return UCharacter.NO_NUMERIC_VALUE;
1076         } else if(ntv<NTV_DIGIT_START_) {
1077             /* decimal digit */
1078             return ntv-NTV_DECIMAL_START_;
1079         } else if(ntv<NTV_NUMERIC_START_) {
1080             /* other digit */
1081             return ntv-NTV_DIGIT_START_;
1082         } else if(ntv<NTV_FRACTION_START_) {
1083             /* small integer */
1084             return ntv-NTV_NUMERIC_START_;
1085         } else if(ntv<NTV_LARGE_START_) {
1086             /* fraction */
1087             int numerator=(ntv>>4)-12;
1088             int denominator=(ntv&0xf)+1;
1089             return (double)numerator/denominator;
1090         } else if(ntv<NTV_BASE60_START_) {
1091             /* large, single-significant-digit integer */
1092             double numValue;
1093             int mant=(ntv>>5)-14;
1094             int exp=(ntv&0x1f)+2;
1095             numValue=mant;
1096 
1097             /* multiply by 10^exp without math.h */
1098             while(exp>=4) {
1099                 numValue*=10000.;
1100                 exp-=4;
1101             }
1102             switch(exp) {
1103             case 3:
1104                 numValue*=1000.;
1105                 break;
1106             case 2:
1107                 numValue*=100.;
1108                 break;
1109             case 1:
1110                 numValue*=10.;
1111                 break;
1112             case 0:
1113             default:
1114                 break;
1115             }
1116 
1117             return numValue;
1118         } else if(ntv<NTV_FRACTION20_START_) {
1119             /* sexagesimal (base 60) integer */
1120             int numValue=(ntv>>2)-0xbf;
1121             int exp=(ntv&3)+1;
1122 
1123             switch(exp) {
1124             case 4:
1125                 numValue*=60*60*60*60;
1126                 break;
1127             case 3:
1128                 numValue*=60*60*60;
1129                 break;
1130             case 2:
1131                 numValue*=60*60;
1132                 break;
1133             case 1:
1134                 numValue*=60;
1135                 break;
1136             case 0:
1137             default:
1138                 break;
1139             }
1140 
1141             return numValue;
1142         } else if(ntv<NTV_FRACTION32_START_) {
1143             // fraction-20 e.g. 3/80
1144             int frac20=ntv-NTV_FRACTION20_START_;  // 0..0x17
1145             int numerator=2*(frac20&3)+1;
1146             int denominator=20<<(frac20>>2);
1147             return (double)numerator/denominator;
1148         } else if(ntv<NTV_RESERVED_START_) {
1149             // fraction-32 e.g. 3/64
1150             int frac32=ntv-NTV_FRACTION32_START_;  // 0..15
1151             int numerator=2*(frac32&3)+1;
1152             int denominator=32<<(frac32>>2);
1153             return (double)numerator/denominator;
1154         } else {
1155             /* reserved */
1156             return UCharacter.NO_NUMERIC_VALUE;
1157         }
1158     }
1159 
1160     // protected variables -----------------------------------------------
1161 
1162     /**
1163      * Extra property trie
1164      */
1165     Trie2_16 m_additionalTrie_;
1166     /**
1167      * Extra property vectors, 1st column for age and second for binary
1168      * properties.
1169      */
1170     int m_additionalVectors_[];
1171     /**
1172      * Number of additional columns
1173      */
1174     int m_additionalColumnsCount_;
1175     /**
1176      * Maximum values for block, bits used as in vector word
1177      * 0
1178      */
1179     int m_maxBlockScriptValue_;
1180     /**
1181      * Maximum values for script, bits used as in vector word
1182      * 0
1183      */
1184      int m_maxJTGValue_;
1185 
1186     /**
1187      * Script_Extensions data
1188      */
1189     public char[] m_scriptExtensions_;
1190 
1191     // private variables -------------------------------------------------
1192 
1193     /**
1194     * Default name of the datafile
1195     */
1196     private static final String DATA_FILE_NAME_ = "uprops.icu";
1197 
1198     // property data constants -------------------------------------------------
1199 
1200     /**
1201      * Numeric types and values in the main properties words.
1202      */
1203     private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
getNumericTypeValue(int props)1204     private static final int getNumericTypeValue(int props) {
1205         return props >> NUMERIC_TYPE_VALUE_SHIFT_;
1206     }
1207     /* constants for the storage form of numeric types and values */
1208     /** No numeric value. */
1209     private static final int NTV_NONE_ = 0;
1210     /** Decimal digits: nv=0..9 */
1211     private static final int NTV_DECIMAL_START_ = 1;
1212     /** Other digits: nv=0..9 */
1213     private static final int NTV_DIGIT_START_ = 11;
1214     /** Small integers: nv=0..154 */
1215     private static final int NTV_NUMERIC_START_ = 21;
1216     /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
1217     private static final int NTV_FRACTION_START_ = 0xb0;
1218     /**
1219      * Large integers:
1220      * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
1221      * (only one significant decimal digit)
1222      */
1223     private static final int NTV_LARGE_START_ = 0x1e0;
1224     /**
1225      * Sexagesimal numbers:
1226      * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
1227      */
1228     private static final int NTV_BASE60_START_=0x300;
1229     /**
1230      * Fraction-20 values:
1231      * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
1232      * numerator: num = 2*(frac20&3)+1
1233      * denominator: den = 20<<(frac20>>2)
1234      */
1235     private static final int NTV_FRACTION20_START_ = NTV_BASE60_START_ + 36;  // 0x300+9*4=0x324
1236     /**
1237      * Fraction-32 values:
1238      * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
1239      * numerator: num = 2*(frac32&3)+1
1240      * denominator: den = 32<<(frac32>>2)
1241      */
1242     private static final int NTV_FRACTION32_START_ = NTV_FRACTION20_START_ + 24;  // 0x324+6*4=0x34c
1243     /** No numeric value (yet). */
1244     private static final int NTV_RESERVED_START_ = NTV_FRACTION32_START_ + 16;  // 0x34c+4*4=0x35c
1245 
ntvGetType(int ntv)1246     private static final int ntvGetType(int ntv) {
1247         return
1248             (ntv==NTV_NONE_) ? NumericType.NONE :
1249             (ntv<NTV_DIGIT_START_) ?  NumericType.DECIMAL :
1250             (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
1251             NumericType.NUMERIC;
1252     }
1253 
1254     /*
1255      * Properties in vector word 0
1256      * Bits
1257      * 31..24   DerivedAge version major/minor one nibble each
1258      * 23..22   3..1: Bits 21..20 & 7..0 = Script_Extensions index
1259      *             3: Script value from Script_Extensions
1260      *             2: Script=Inherited
1261      *             1: Script=Common
1262      *             0: Script=bits 21..20 & 7..0
1263      * 21..20   Bits 9..8 of the UScriptCode, or index to Script_Extensions
1264      * 19..17   East Asian Width
1265      * 16.. 8   UBlockCode
1266      *  7.. 0   UScriptCode, or index to Script_Extensions
1267      */
1268 
1269     /**
1270      * Script_Extensions: mask includes Script
1271      */
1272     public static final int SCRIPT_X_MASK = 0x00f000ff;
1273     //private static final int SCRIPT_X_SHIFT = 22;
1274 
1275     // The UScriptCode or Script_Extensions index is split across two bit fields.
1276     // (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.)
1277     // Shift the high bits right by 12 to assemble the full value.
1278     public static final int SCRIPT_HIGH_MASK = 0x00300000;
1279     public static final int SCRIPT_HIGH_SHIFT = 12;
1280     public static final int MAX_SCRIPT = 0x3ff;
1281 
1282     /**
1283      * Integer properties mask and shift values for East Asian cell width.
1284      * Equivalent to icu4c UPROPS_EA_MASK
1285      */
1286     private static final int EAST_ASIAN_MASK_ = 0x000e0000;
1287     /**
1288      * Integer properties mask and shift values for East Asian cell width.
1289      * Equivalent to icu4c UPROPS_EA_SHIFT
1290      */
1291     private static final int EAST_ASIAN_SHIFT_ = 17;
1292     /**
1293      * Integer properties mask and shift values for blocks.
1294      * Equivalent to icu4c UPROPS_BLOCK_MASK
1295      */
1296     private static final int BLOCK_MASK_ = 0x0001ff00;
1297     /**
1298      * Integer properties mask and shift values for blocks.
1299      * Equivalent to icu4c UPROPS_BLOCK_SHIFT
1300      */
1301     private static final int BLOCK_SHIFT_ = 8;
1302     /**
1303      * Integer properties mask and shift values for scripts.
1304      * Equivalent to icu4c UPROPS_SHIFT_LOW_MASK.
1305      */
1306     public static final int SCRIPT_LOW_MASK = 0x000000ff;
1307 
1308     /* SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
1309     public static final int SCRIPT_X_WITH_COMMON = 0x400000;
1310     public static final int SCRIPT_X_WITH_INHERITED = 0x800000;
1311     public static final int SCRIPT_X_WITH_OTHER = 0xc00000;
1312 
mergeScriptCodeOrIndex(int scriptX)1313     public static final int mergeScriptCodeOrIndex(int scriptX) {
1314         return
1315             ((scriptX & SCRIPT_HIGH_MASK) >> SCRIPT_HIGH_SHIFT) |
1316             (scriptX & SCRIPT_LOW_MASK);
1317     }
1318 
1319     /**
1320      * Additional properties used in internal trie data
1321      */
1322     /*
1323      * Properties in vector word 1
1324      * Each bit encodes one binary property.
1325      * The following constants represent the bit number, use 1<<UPROPS_XYZ.
1326      * UPROPS_BINARY_1_TOP<=32!
1327      *
1328      * Keep this list of property enums in sync with
1329      * propListNames[] in icu/source/tools/genprops/props2.c!
1330      *
1331      * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
1332      */
1333     private static final int WHITE_SPACE_PROPERTY_ = 0;
1334     private static final int DASH_PROPERTY_ = 1;
1335     private static final int HYPHEN_PROPERTY_ = 2;
1336     private static final int QUOTATION_MARK_PROPERTY_ = 3;
1337     private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
1338     private static final int MATH_PROPERTY_ = 5;
1339     private static final int HEX_DIGIT_PROPERTY_ = 6;
1340     private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
1341     private static final int ALPHABETIC_PROPERTY_ = 8;
1342     private static final int IDEOGRAPHIC_PROPERTY_ = 9;
1343     private static final int DIACRITIC_PROPERTY_ = 10;
1344     private static final int EXTENDER_PROPERTY_ = 11;
1345     private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
1346     private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
1347     private static final int GRAPHEME_LINK_PROPERTY_ = 14;
1348     private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
1349     private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
1350     private static final int RADICAL_PROPERTY_ = 17;
1351     private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
1352     private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
1353     private static final int DEPRECATED_PROPERTY_ = 20;
1354     private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
1355     private static final int XID_START_PROPERTY_ = 22;
1356     private static final int XID_CONTINUE_PROPERTY_ = 23;
1357     private static final int ID_START_PROPERTY_    = 24;
1358     private static final int ID_CONTINUE_PROPERTY_ = 25;
1359     private static final int GRAPHEME_BASE_PROPERTY_ = 26;
1360     private static final int S_TERM_PROPERTY_ = 27;
1361     private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
1362     private static final int PATTERN_SYNTAX = 29;                   /* new in ICU 3.4 and Unicode 4.1 */
1363     private static final int PATTERN_WHITE_SPACE = 30;
1364     private static final int PREPENDED_CONCATENATION_MARK = 31;     // new in ICU 60 and Unicode 10
1365 
1366     /*
1367      * Properties in vector word 2
1368      * Bits
1369      * 31..26   http://www.unicode.org/reports/tr51/#Emoji_Properties
1370      * 25..20   Line Break
1371      * 19..15   Sentence Break
1372      * 14..10   Word Break
1373      *  9.. 5   Grapheme Cluster Break
1374      *  4.. 0   Decomposition Type
1375      */
1376     private static final int PROPS_2_EXTENDED_PICTOGRAPHIC=26;
1377     private static final int PROPS_2_EMOJI_COMPONENT = 27;
1378     private static final int PROPS_2_EMOJI = 28;
1379     private static final int PROPS_2_EMOJI_PRESENTATION = 29;
1380     private static final int PROPS_2_EMOJI_MODIFIER = 30;
1381     private static final int PROPS_2_EMOJI_MODIFIER_BASE = 31;
1382 
1383     private static final int LB_MASK          = 0x03f00000;
1384     private static final int LB_SHIFT         = 20;
1385 
1386     private static final int SB_MASK          = 0x000f8000;
1387     private static final int SB_SHIFT         = 15;
1388 
1389     private static final int WB_MASK          = 0x00007c00;
1390     private static final int WB_SHIFT         = 10;
1391 
1392     private static final int GCB_MASK         = 0x000003e0;
1393     private static final int GCB_SHIFT        = 5;
1394 
1395     /**
1396      * Integer properties mask for decomposition type.
1397      * Equivalent to icu4c UPROPS_DT_MASK.
1398      */
1399     private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
1400 
1401     /**
1402      * First nibble shift
1403      */
1404     private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
1405     /**
1406      * Second nibble mask
1407      */
1408     private static final int LAST_NIBBLE_MASK_ = 0xF;
1409     /**
1410      * Age value shift
1411      */
1412     private static final int AGE_SHIFT_ = 24;
1413 
1414 
1415     // private constructors --------------------------------------------------
1416 
1417     /**
1418      * Constructor
1419      * @exception IOException thrown when data reading fails or data corrupted
1420      */
UCharacterProperty()1421     private UCharacterProperty() throws IOException
1422     {
1423         // consistency check
1424         if(binProps.length!=UProperty.BINARY_LIMIT) {
1425             throw new ICUException("binProps.length!=UProperty.BINARY_LIMIT");
1426         }
1427         if(intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)) {
1428             throw new ICUException("intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)");
1429         }
1430 
1431         // jar access
1432         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
1433         m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
1434         // Read or skip the 16 indexes.
1435         int propertyOffset = bytes.getInt();
1436         /* exceptionOffset = */ bytes.getInt();
1437         /* caseOffset = */ bytes.getInt();
1438         int additionalOffset = bytes.getInt();
1439         int additionalVectorsOffset = bytes.getInt();
1440         m_additionalColumnsCount_ = bytes.getInt();
1441         int scriptExtensionsOffset = bytes.getInt();
1442         int reservedOffset7 = bytes.getInt();
1443         /* reservedOffset8 = */ bytes.getInt();
1444         /* dataTopOffset = */ bytes.getInt();
1445         m_maxBlockScriptValue_ = bytes.getInt();
1446         m_maxJTGValue_ = bytes.getInt();
1447         ICUBinary.skipBytes(bytes, (16 - 12) << 2);
1448 
1449         // read the main properties trie
1450         m_trie_ = Trie2_16.createFromSerialized(bytes);
1451         int expectedTrieLength = (propertyOffset - 16) * 4;
1452         int trieLength = m_trie_.getSerializedLength();
1453         if(trieLength > expectedTrieLength) {
1454             throw new IOException("uprops.icu: not enough bytes for main trie");
1455         }
1456         // skip padding after trie bytes
1457         ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
1458 
1459         // skip unused intervening data structures
1460         ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);
1461 
1462         if(m_additionalColumnsCount_ > 0) {
1463             // reads the additional property block
1464             m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
1465             expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
1466             trieLength = m_additionalTrie_.getSerializedLength();
1467             if(trieLength > expectedTrieLength) {
1468                 throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
1469             }
1470             // skip padding after trie bytes
1471             ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
1472 
1473             // additional properties
1474             int size = scriptExtensionsOffset - additionalVectorsOffset;
1475             m_additionalVectors_ = ICUBinary.getInts(bytes, size, 0);
1476         }
1477 
1478         // Script_Extensions
1479         int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
1480         if(numChars > 0) {
1481             m_scriptExtensions_ = ICUBinary.getChars(bytes, numChars, 0);
1482         }
1483     }
1484 
1485     private static final class IsAcceptable implements ICUBinary.Authenticate {
1486         @Override
isDataVersionAcceptable(byte version[])1487         public boolean isDataVersionAcceptable(byte version[]) {
1488             return version[0] == 7;
1489         }
1490     }
1491     private static final int DATA_FORMAT = 0x5550726F;  // "UPro"
1492 
1493     // private methods -------------------------------------------------------
1494 
1495     /*
1496      * Compare additional properties to see if it has argument type
1497      * @param property 32 bit properties
1498      * @param type character type
1499      * @return true if property has type
1500      */
1501     /*private boolean compareAdditionalType(int property, int type)
1502     {
1503         return (property & (1 << type)) != 0;
1504     }*/
1505 
1506     // property starts for UnicodeSet -------------------------------------- ***
1507 
1508     private static final int TAB     = 0x0009;
1509     //private static final int LF      = 0x000a;
1510     //private static final int FF      = 0x000c;
1511     private static final int CR      = 0x000d;
1512     private static final int U_A     = 0x0041;
1513     private static final int U_F     = 0x0046;
1514     private static final int U_Z     = 0x005a;
1515     private static final int U_a     = 0x0061;
1516     private static final int U_f     = 0x0066;
1517     private static final int U_z     = 0x007a;
1518     private static final int DEL     = 0x007f;
1519     private static final int NL      = 0x0085;
1520     private static final int NBSP    = 0x00a0;
1521     private static final int CGJ     = 0x034f;
1522     private static final int FIGURESP= 0x2007;
1523     private static final int HAIRSP  = 0x200a;
1524     //private static final int ZWNJ    = 0x200c;
1525     //private static final int ZWJ     = 0x200d;
1526     private static final int RLM     = 0x200f;
1527     private static final int NNBSP   = 0x202f;
1528     private static final int WJ      = 0x2060;
1529     private static final int INHSWAP = 0x206a;
1530     private static final int NOMDIG  = 0x206f;
1531     private static final int U_FW_A  = 0xff21;
1532     private static final int U_FW_F  = 0xff26;
1533     private static final int U_FW_Z  = 0xff3a;
1534     private static final int U_FW_a  = 0xff41;
1535     private static final int U_FW_f  = 0xff46;
1536     private static final int U_FW_z  = 0xff5a;
1537     private static final int ZWNBSP  = 0xfeff;
1538 
addPropertyStarts(UnicodeSet set)1539     public UnicodeSet addPropertyStarts(UnicodeSet set) {
1540         /* add the start code point of each same-value range of the main trie */
1541         Iterator<Trie2.Range> trieIterator = m_trie_.iterator();
1542         Trie2.Range range;
1543         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
1544             set.add(range.startCodePoint);
1545         }
1546 
1547         /* add code points with hardcoded properties, plus the ones following them */
1548 
1549         /* add for u_isblank() */
1550         set.add(TAB);
1551         set.add(TAB+1);
1552 
1553         /* add for IS_THAT_CONTROL_SPACE() */
1554         set.add(CR+1); /* range TAB..CR */
1555         set.add(0x1c);
1556         set.add(0x1f+1);
1557         set.add(NL);
1558         set.add(NL+1);
1559 
1560         /* add for u_isIDIgnorable() what was not added above */
1561         set.add(DEL); /* range DEL..NBSP-1, NBSP added below */
1562         set.add(HAIRSP);
1563         set.add(RLM+1);
1564         set.add(INHSWAP);
1565         set.add(NOMDIG+1);
1566         set.add(ZWNBSP);
1567         set.add(ZWNBSP+1);
1568 
1569         /* add no-break spaces for u_isWhitespace() what was not added above */
1570         set.add(NBSP);
1571         set.add(NBSP+1);
1572         set.add(FIGURESP);
1573         set.add(FIGURESP+1);
1574         set.add(NNBSP);
1575         set.add(NNBSP+1);
1576 
1577         /* add for u_charDigitValue() */
1578         // TODO remove when UCharacter.getHanNumericValue() is changed to just return
1579         // Unicode numeric values
1580         set.add(0x3007);
1581         set.add(0x3008);
1582         set.add(0x4e00);
1583         set.add(0x4e01);
1584         set.add(0x4e8c);
1585         set.add(0x4e8d);
1586         set.add(0x4e09);
1587         set.add(0x4e0a);
1588         set.add(0x56db);
1589         set.add(0x56dc);
1590         set.add(0x4e94);
1591         set.add(0x4e95);
1592         set.add(0x516d);
1593         set.add(0x516e);
1594         set.add(0x4e03);
1595         set.add(0x4e04);
1596         set.add(0x516b);
1597         set.add(0x516c);
1598         set.add(0x4e5d);
1599         set.add(0x4e5e);
1600 
1601         /* add for u_digit() */
1602         set.add(U_a);
1603         set.add(U_z+1);
1604         set.add(U_A);
1605         set.add(U_Z+1);
1606         set.add(U_FW_a);
1607         set.add(U_FW_z+1);
1608         set.add(U_FW_A);
1609         set.add(U_FW_Z+1);
1610 
1611         /* add for u_isxdigit() */
1612         set.add(U_f+1);
1613         set.add(U_F+1);
1614         set.add(U_FW_f+1);
1615         set.add(U_FW_F+1);
1616 
1617         /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
1618         set.add(WJ); /* range WJ..NOMDIG */
1619         set.add(0xfff0);
1620         set.add(0xfffb+1);
1621         set.add(0xe0000);
1622         set.add(0xe0fff+1);
1623 
1624         /* add for UCHAR_GRAPHEME_BASE and others */
1625         set.add(CGJ);
1626         set.add(CGJ+1);
1627 
1628         return set; // for chaining
1629     }
1630 
upropsvec_addPropertyStarts(UnicodeSet set)1631     public void upropsvec_addPropertyStarts(UnicodeSet set) {
1632         /* add the start code point of each same-value range of the properties vectors trie */
1633         if(m_additionalColumnsCount_>0) {
1634             /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
1635             Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
1636             Trie2.Range range;
1637             while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
1638                 set.add(range.startCodePoint);
1639             }
1640         }
1641     }
1642 
ulayout_addPropertyStarts(int src, UnicodeSet set)1643     static UnicodeSet ulayout_addPropertyStarts(int src, UnicodeSet set) {
1644         return LayoutProps.INSTANCE.addPropertyStarts(src, set);
1645     }
1646 
1647     // This static initializer block must be placed after
1648     // other static member initialization
1649     static {
1650         try {
1651             INSTANCE = new UCharacterProperty();
1652         }
1653         catch (IOException e) {
1654             throw new MissingResourceException(e.getMessage(),"","");
1655         }
1656     }
1657 
1658 /*----------------------------------------------------------------
1659  * Inclusions list
1660  *----------------------------------------------------------------*/
1661 
1662     /*
1663      * Return a set of characters for property enumeration.
1664      * The set implicitly contains 0x110000 as well, which is one more than the highest
1665      * Unicode code point.
1666      *
1667      * This set is used as an ordered list - its code points are ordered, and
1668      * consecutive code points (in Unicode code point order) in the set define a range.
1669      * For each two consecutive characters (start, limit) in the set,
1670      * all of the UCD/normalization and related properties for
1671      * all code points start..limit-1 are all the same,
1672      * except for character names and ISO comments.
1673      *
1674      * All Unicode code points U+0000..U+10ffff are covered by these ranges.
1675      * The ranges define a partition of the Unicode code space.
1676      * ICU uses the inclusions set to enumerate properties for generating
1677      * UnicodeSets containing all code points that have a certain property value.
1678      *
1679      * The Inclusion List is generated from the UCD. It is generated
1680      * by enumerating the data tries, and code points for hardcoded properties
1681      * are added as well.
1682      *
1683      * --------------------------------------------------------------------------
1684      *
1685      * The following are ideas for getting properties-unique code point ranges,
1686      * with possible optimizations beyond the current implementation.
1687      * These optimizations would require more code and be more fragile.
1688      * The current implementation generates one single list (set) for all properties.
1689      *
1690      * To enumerate properties efficiently, one needs to know ranges of
1691      * repetitive values, so that the value of only each start code point
1692      * can be applied to the whole range.
1693      * This information is in principle available in the uprops.icu/unorm.icu data.
1694      *
1695      * There are two obstacles:
1696      *
1697      * 1. Some properties are computed from multiple data structures,
1698      *    making it necessary to get repetitive ranges by intersecting
1699      *    ranges from multiple tries.
1700      *
1701      * 2. It is not economical to write code for getting repetitive ranges
1702      *    that are precise for each of some 50 properties.
1703      *
1704      * Compromise ideas:
1705      *
1706      * - Get ranges per trie, not per individual property.
1707      *   Each range contains the same values for a whole group of properties.
1708      *   This would generate currently five range sets, two for uprops.icu tries
1709      *   and three for unorm.icu tries.
1710      *
1711      * - Combine sets of ranges for multiple tries to get sufficient sets
1712      *   for properties, e.g., the uprops.icu main and auxiliary tries
1713      *   for all non-normalization properties.
1714      *
1715      * Ideas for representing ranges and combining them:
1716      *
1717      * - A UnicodeSet could hold just the start code points of ranges.
1718      *   Multiple sets are easily combined by or-ing them together.
1719      *
1720      * - Alternatively, a UnicodeSet could hold each even-numbered range.
1721      *   All ranges could be enumerated by using each start code point
1722      *   (for the even-numbered ranges) as well as each limit (end+1) code point
1723      *   (for the odd-numbered ranges).
1724      *   It should be possible to combine two such sets by xor-ing them,
1725      *   but no more than two.
1726      *
1727      * The second way to represent ranges may(?!) yield smaller UnicodeSet arrays,
1728      * but the first one is certainly simpler and applicable for combining more than
1729      * two range sets.
1730      *
1731      * It is possible to combine all range sets for all uprops/unorm tries into one
1732      * set that can be used for all properties.
1733      * As an optimization, there could be less-combined range sets for certain
1734      * groups of properties.
1735      * The relationship of which less-combined range set to use for which property
1736      * depends on the implementation of the properties and must be hardcoded
1737      * - somewhat error-prone and higher maintenance but can be tested easily
1738      * by building property sets "the simple way" in test code.
1739      *
1740      * ---
1741      *
1742      * Do not use a UnicodeSet pattern because that causes infinite recursion;
1743      * UnicodeSet depends on the inclusions set.
1744      *
1745      * ---
1746      *
1747      * getInclusions() is commented out starting 2005-feb-12 because
1748      * UnicodeSet now calls the uxyz_addPropertyStarts() directly,
1749      * and only for the relevant property source.
1750      */
1751     /*
1752     public UnicodeSet getInclusions() {
1753         UnicodeSet set = new UnicodeSet();
1754         NormalizerImpl.addPropertyStarts(set);
1755         addPropertyStarts(set);
1756         return set;
1757     }
1758     */
1759 }
1760