• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.impl;
11 
12 import java.io.IOException;
13 import java.nio.ByteBuffer;
14 import java.util.Iterator;
15 import java.util.MissingResourceException;
16 
17 import com.ibm.icu.lang.UCharacter;
18 import com.ibm.icu.lang.UCharacter.HangulSyllableType;
19 import com.ibm.icu.lang.UCharacter.NumericType;
20 import com.ibm.icu.lang.UCharacterCategory;
21 import com.ibm.icu.lang.UProperty;
22 import com.ibm.icu.lang.UScript;
23 import com.ibm.icu.text.Normalizer2;
24 import com.ibm.icu.text.UTF16;
25 import com.ibm.icu.text.UnicodeSet;
26 import com.ibm.icu.util.CodePointMap;
27 import com.ibm.icu.util.CodePointTrie;
28 import com.ibm.icu.util.ICUException;
29 import com.ibm.icu.util.ICUUncheckedIOException;
30 import com.ibm.icu.util.VersionInfo;
31 
32 /**
33 * <p>Internal class used for Unicode character property database.</p>
34 * <p>This classes store binary data read from uprops.icu.
35 * It does not have the capability to parse the data into more high-level
36 * information. It only returns bytes of information when required.</p>
37 * <p>Due to the form most commonly used for retrieval, array of char is used
38 * to store the binary data.</p>
39 * <p>UCharacterPropertyDB also contains information on accessing indexes to
40 * significant points in the binary data.</p>
41 * <p>Responsibility for molding the binary data into more meaning form lies on
42 * <a href=UCharacter.html>UCharacter</a>.</p>
43 * @author Syn Wee Quek
44 * @since release 2.1, february 1st 2002
45 */
46 
47 public final class UCharacterProperty
48 {
49     // public data members -----------------------------------------------
50 
51     /*
52      * public singleton instance
53      */
54     public static final UCharacterProperty INSTANCE;
55 
56     /**
57     * Trie data
58     */
59     public Trie2_16 m_trie_;
60     /**
61     * Unicode version
62     */
63     public VersionInfo m_unicodeVersion_;
64     /**
65     * Latin capital letter i with dot above
66     */
67     public static final char LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_ = 0x130;
68     /**
69     * Latin small letter i with dot above
70     */
71     public static final char LATIN_SMALL_LETTER_DOTLESS_I_ = 0x131;
72     /**
73     * Latin lowercase i
74     */
75     public static final char LATIN_SMALL_LETTER_I_ = 0x69;
76     /**
77     * Character type mask
78     */
79     public static final int TYPE_MASK = 0x1F;
80 
81     // uprops.h enum UPropertySource --------------------------------------- ***
82 
83     /** No source, not a supported property. */
84     public static final int SRC_NONE=0;
85     /** From uchar.c/uprops.icu main trie */
86     public static final int SRC_CHAR=1;
87     /** From uchar.c/uprops.icu properties vectors trie */
88     public static final int SRC_PROPSVEC=2;
89     /** From unames.c/unames.icu */
90     public static final int SRC_NAMES=3;
91     /** From ucase.c/ucase.icu */
92     public static final int SRC_CASE=4;
93     /** From ubidi_props.c/ubidi.icu */
94     public static final int SRC_BIDI=5;
95     /** From uchar.c/uprops.icu main trie as well as properties vectors trie */
96     public static final int SRC_CHAR_AND_PROPSVEC=6;
97     /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
98     public static final int SRC_CASE_AND_NORM=7;
99     /** From normalizer2impl.cpp/nfc.nrm */
100     public static final int SRC_NFC=8;
101     /** From normalizer2impl.cpp/nfkc.nrm */
102     public static final int SRC_NFKC=9;
103     /** From normalizer2impl.cpp/nfkc_cf.nrm */
104     public static final int SRC_NFKC_CF=10;
105     /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
106     public static final int SRC_NFC_CANON_ITER=11;
107     // Text layout properties.
108     public static final int SRC_INPC=12;
109     public static final int SRC_INSC=13;
110     public static final int SRC_VO=14;
111     public static final int SRC_EMOJI=15;
112     /** One more than the highest UPropertySource (SRC_) constant. */
113     public static final int SRC_COUNT=16;
114 
115     private static final class LayoutProps {
116         private static final class IsAcceptable implements ICUBinary.Authenticate {
117             @Override
isDataVersionAcceptable(byte version[])118             public boolean isDataVersionAcceptable(byte version[]) {
119                 return version[0] == 1;
120             }
121         }
122         private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
123         private static final int DATA_FORMAT = 0x4c61796f;  // "Layo"
124 
125         // indexes into indexes[]
126         // Element 0 stores the length of the indexes[] array.
127         //ivate static final int IX_INDEXES_LENGTH = 0;
128         // Elements 1..7 store the tops of consecutive code point tries.
129         // No trie is stored if the difference between two of these is less than 16.
130         private static final int IX_INPC_TRIE_TOP = 1;
131         private static final int IX_INSC_TRIE_TOP = 2;
132         private static final int IX_VO_TRIE_TOP = 3;
133         //ivate static final int IX_RESERVED_TOP = 4;
134 
135         //ivate static final int IX_TRIES_TOP = 7;
136 
137         private static final int IX_MAX_VALUES = 9;
138 
139         // Length of indexes[]. Multiple of 4 to 16-align the tries.
140         //ivate static final int IX_COUNT = 12;
141 
142         private static final int MAX_INPC_SHIFT = 24;
143         private static final int MAX_INSC_SHIFT = 16;
144         private static final int MAX_VO_SHIFT = 8;
145 
146         static final LayoutProps INSTANCE = new LayoutProps();
147 
148         CodePointTrie inpcTrie = null;  // Indic_Positional_Category
149         CodePointTrie inscTrie = null;  // Indic_Syllabic_Category
150         CodePointTrie voTrie = null;  // Vertical_Orientation
151 
152         int maxInpcValue = 0;
153         int maxInscValue = 0;
154         int maxVoValue = 0;
155 
LayoutProps()156         LayoutProps() {
157             ByteBuffer bytes = ICUBinary.getRequiredData("ulayout.icu");
158             try {
159                 ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
160                 int startPos = bytes.position();
161                 int indexesLength = bytes.getInt();  // inIndexes[IX_INDEXES_LENGTH]
162                 if (indexesLength < 12) {
163                     throw new ICUUncheckedIOException(
164                             "Text layout properties data: not enough indexes");
165                 }
166                 int[] inIndexes = new int[indexesLength];
167                 inIndexes[0] = indexesLength;
168                 for (int i = 1; i < indexesLength; ++i) {
169                     inIndexes[i] = bytes.getInt();
170                 }
171 
172                 int offset = indexesLength * 4;
173                 int top = inIndexes[IX_INPC_TRIE_TOP];
174                 int trieSize = top - offset;
175                 if (trieSize >= 16) {
176                     inpcTrie = CodePointTrie.fromBinary(null, null, bytes);
177                 }
178                 int pos = bytes.position() - startPos;
179                 assert top >= pos;
180                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
181                 offset = top;
182                 top = inIndexes[IX_INSC_TRIE_TOP];
183                 trieSize = top - offset;
184                 if (trieSize >= 16) {
185                     inscTrie = CodePointTrie.fromBinary(null, null, bytes);
186                 }
187                 pos = bytes.position() - startPos;
188                 assert top >= pos;
189                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
190                 offset = top;
191                 top = inIndexes[IX_VO_TRIE_TOP];
192                 trieSize = top - offset;
193                 if (trieSize >= 16) {
194                     voTrie = CodePointTrie.fromBinary(null, null, bytes);
195                 }
196                 pos = bytes.position() - startPos;
197                 assert top >= pos;
198                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
199 
200                 int maxValues = inIndexes[IX_MAX_VALUES];
201                 maxInpcValue = maxValues >>> MAX_INPC_SHIFT;
202                 maxInscValue = (maxValues >> MAX_INSC_SHIFT) & 0xff;
203                 maxVoValue = (maxValues >> MAX_VO_SHIFT) & 0xff;
204             } catch(IOException e) {
205                 throw new ICUUncheckedIOException(e);
206             }
207         }
208 
addPropertyStarts(int src, UnicodeSet set)209         public UnicodeSet addPropertyStarts(int src, UnicodeSet set) {
210             CodePointTrie trie;
211             switch (src) {
212             case SRC_INPC:
213                 trie = inpcTrie;
214                 break;
215             case SRC_INSC:
216                 trie = inscTrie;
217                 break;
218             case SRC_VO:
219                 trie = voTrie;
220                 break;
221             default:
222                 throw new IllegalStateException();
223             }
224 
225             if (trie == null) {
226                 throw new MissingResourceException(
227                         "no data for one of the text layout properties; src=" + src,
228                         "LayoutProps", "");
229             }
230 
231             // Add the start code point of each same-value range of the trie.
232             CodePointMap.Range range = new CodePointMap.Range();
233             int start = 0;
234             while (trie.getRange(start, null, range)) {
235                 set.add(start);
236                 start = range.getEnd() + 1;
237             }
238             return set;
239         }
240     }
241 
242     // public methods ----------------------------------------------------
243 
244     /**
245     * Gets the main property value for code point ch.
246     * @param ch code point whose property value is to be retrieved
247     * @return property value of code point
248     */
getProperty(int ch)249     public final int getProperty(int ch)
250     {
251         return m_trie_.get(ch);
252     }
253 
254     /**
255      * Gets the unicode additional properties.
256      * Java version of C u_getUnicodeProperties().
257      * @param codepoint codepoint whose additional properties is to be
258      *                  retrieved
259      * @param column The column index.
260      * @return unicode properties
261      */
getAdditional(int codepoint, int column)262     public int getAdditional(int codepoint, int column) {
263         assert column >= 0;
264         if (column >= m_additionalColumnsCount_) {
265             return 0;
266         }
267         return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
268     }
269 
270     static final int MY_MASK = UCharacterProperty.TYPE_MASK
271         & ((1<<UCharacterCategory.UPPERCASE_LETTER) |
272             (1<<UCharacterCategory.LOWERCASE_LETTER) |
273             (1<<UCharacterCategory.TITLECASE_LETTER) |
274             (1<<UCharacterCategory.MODIFIER_LETTER) |
275             (1<<UCharacterCategory.OTHER_LETTER));
276 
277 
278        /**
279      * <p>Get the "age" of the code point.</p>
280      * <p>The "age" is the Unicode version when the code point was first
281      * designated (as a non-character or for Private Use) or assigned a
282      * character.</p>
283      * <p>This can be useful to avoid emitting code points to receiving
284      * processes that do not accept newer characters.</p>
285      * <p>The data is from the UCD file DerivedAge.txt.</p>
286      * <p>This API does not check the validity of the codepoint.</p>
287      * @param codepoint The code point.
288      * @return the Unicode version number
289      */
getAge(int codepoint)290     public VersionInfo getAge(int codepoint)
291     {
292         int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
293         return VersionInfo.getInstance(
294                            (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
295                            version & LAST_NIBBLE_MASK_, 0, 0);
296     }
297 
298     private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED);
299     private static final int GC_CC_MASK = getMask(UCharacter.CONTROL);
300     private static final int GC_CS_MASK = getMask(UCharacter.SURROGATE);
301     private static final int GC_ZS_MASK = getMask(UCharacter.SPACE_SEPARATOR);
302     private static final int GC_ZL_MASK = getMask(UCharacter.LINE_SEPARATOR);
303     private static final int GC_ZP_MASK = getMask(UCharacter.PARAGRAPH_SEPARATOR);
304     /** Mask constant for multiple UCharCategory bits (Z Separators). */
305     private static final int GC_Z_MASK = GC_ZS_MASK|GC_ZL_MASK|GC_ZP_MASK;
306 
307     /**
308      * Checks if c is in
309      * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
310      * with space=\p{Whitespace} and Control=Cc.
311      * Implements UCHAR_POSIX_GRAPH.
312      * @internal
313      */
isgraphPOSIX(int c)314     private static final boolean isgraphPOSIX(int c) {
315         /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
316         /* comparing ==0 returns FALSE for the categories mentioned */
317         return (getMask(UCharacter.getType(c))&
318                 (GC_CC_MASK|GC_CS_MASK|GC_CN_MASK|GC_Z_MASK))
319                ==0;
320     }
321 
322     // binary properties --------------------------------------------------- ***
323 
324     private class BinaryProperty {
325         int column;  // SRC_PROPSVEC column, or "source" if mask==0
326         int mask;
BinaryProperty(int column, int mask)327         BinaryProperty(int column, int mask) {
328             this.column=column;
329             this.mask=mask;
330         }
BinaryProperty(int source)331         BinaryProperty(int source) {
332             this.column=source;
333             this.mask=0;
334         }
getSource()335         final int getSource() {
336             return mask==0 ? column : SRC_PROPSVEC;
337         }
contains(int c)338         boolean contains(int c) {
339             // systematic, directly stored properties
340             return (getAdditional(c, column)&mask)!=0;
341         }
342     }
343 
344     private class CaseBinaryProperty extends BinaryProperty {  // case mapping properties
345         int which;
CaseBinaryProperty(int which)346         CaseBinaryProperty(int which) {
347             super(SRC_CASE);
348             this.which=which;
349         }
350         @Override
contains(int c)351         boolean contains(int c) {
352             return UCaseProps.INSTANCE.hasBinaryProperty(c, which);
353         }
354     }
355 
356     private class EmojiBinaryProperty extends BinaryProperty {
357         int which;
EmojiBinaryProperty(int which)358         EmojiBinaryProperty(int which) {
359             super(SRC_EMOJI);
360             this.which=which;
361         }
362         @Override
contains(int c)363         boolean contains(int c) {
364             return EmojiProps.INSTANCE.hasBinaryProperty(c, which);
365         }
366     }
367 
368     private class NormInertBinaryProperty extends BinaryProperty {  // UCHAR_NF*_INERT properties
369         int which;
NormInertBinaryProperty(int source, int which)370         NormInertBinaryProperty(int source, int which) {
371             super(source);
372             this.which=which;
373         }
374         @Override
contains(int c)375         boolean contains(int c) {
376             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_INERT).isInert(c);
377         }
378     }
379 
380     BinaryProperty[] binProps={
381         /*
382          * Binary-property implementations must be in order of corresponding UProperty,
383          * and there must be exactly one entry per binary UProperty.
384          */
385         new BinaryProperty(1, (1<<ALPHABETIC_PROPERTY_)),
386         new BinaryProperty(1, (1<<ASCII_HEX_DIGIT_PROPERTY_)),
387         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_CONTROL
388             @Override
389             boolean contains(int c) {
390                 return UBiDiProps.INSTANCE.isBidiControl(c);
391             }
392         },
393         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_MIRRORED
394             @Override
395             boolean contains(int c) {
396                 return UBiDiProps.INSTANCE.isMirrored(c);
397             }
398         },
399         new BinaryProperty(1, (1<<DASH_PROPERTY_)),
400         new BinaryProperty(1, (1<<DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_)),
401         new BinaryProperty(1, (1<<DEPRECATED_PROPERTY_)),
402         new BinaryProperty(1, (1<<DIACRITIC_PROPERTY_)),
403         new BinaryProperty(1, (1<<EXTENDER_PROPERTY_)),
404         new BinaryProperty(SRC_NFC) {  // UCHAR_FULL_COMPOSITION_EXCLUSION
405             @Override
406             boolean contains(int c) {
407                 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
408                 Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl;
409                 return impl.isCompNo(impl.getNorm16(c));
410             }
411         },
412         new BinaryProperty(1, (1<<GRAPHEME_BASE_PROPERTY_)),
413         new BinaryProperty(1, (1<<GRAPHEME_EXTEND_PROPERTY_)),
414         new BinaryProperty(1, (1<<GRAPHEME_LINK_PROPERTY_)),
415         new BinaryProperty(1, (1<<HEX_DIGIT_PROPERTY_)),
416         new BinaryProperty(1, (1<<HYPHEN_PROPERTY_)),
417         new BinaryProperty(1, (1<<ID_CONTINUE_PROPERTY_)),
418         new BinaryProperty(1, (1<<ID_START_PROPERTY_)),
419         new BinaryProperty(1, (1<<IDEOGRAPHIC_PROPERTY_)),
420         new BinaryProperty(1, (1<<IDS_BINARY_OPERATOR_PROPERTY_)),
421         new BinaryProperty(1, (1<<IDS_TRINARY_OPERATOR_PROPERTY_)),
422         new BinaryProperty(SRC_BIDI) {  // UCHAR_JOIN_CONTROL
423             @Override
424             boolean contains(int c) {
425                 return UBiDiProps.INSTANCE.isJoinControl(c);
426             }
427         },
428         new BinaryProperty(1, (1<<LOGICAL_ORDER_EXCEPTION_PROPERTY_)),
429         new CaseBinaryProperty(UProperty.LOWERCASE),
430         new BinaryProperty(1, (1<<MATH_PROPERTY_)),
431         new BinaryProperty(1, (1<<NONCHARACTER_CODE_POINT_PROPERTY_)),
432         new BinaryProperty(1, (1<<QUOTATION_MARK_PROPERTY_)),
433         new BinaryProperty(1, (1<<RADICAL_PROPERTY_)),
434         new CaseBinaryProperty(UProperty.SOFT_DOTTED),
435         new BinaryProperty(1, (1<<TERMINAL_PUNCTUATION_PROPERTY_)),
436         new BinaryProperty(1, (1<<UNIFIED_IDEOGRAPH_PROPERTY_)),
437         new CaseBinaryProperty(UProperty.UPPERCASE),
438         new BinaryProperty(1, (1<<WHITE_SPACE_PROPERTY_)),
439         new BinaryProperty(1, (1<<XID_CONTINUE_PROPERTY_)),
440         new BinaryProperty(1, (1<<XID_START_PROPERTY_)),
441         new CaseBinaryProperty(UProperty.CASE_SENSITIVE),
442         new BinaryProperty(1, (1<<S_TERM_PROPERTY_)),
443         new BinaryProperty(1, (1<<VARIATION_SELECTOR_PROPERTY_)),
444         new NormInertBinaryProperty(SRC_NFC, UProperty.NFD_INERT),
445         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKD_INERT),
446         new NormInertBinaryProperty(SRC_NFC, UProperty.NFC_INERT),
447         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKC_INERT),
448         new BinaryProperty(SRC_NFC_CANON_ITER) {  // UCHAR_SEGMENT_STARTER
449             @Override
450             boolean contains(int c) {
451                 return Norm2AllModes.getNFCInstance().impl.
452                     ensureCanonIterData().isCanonSegmentStarter(c);
453             }
454         },
455         new BinaryProperty(1, (1<<PATTERN_SYNTAX)),
456         new BinaryProperty(1, (1<<PATTERN_WHITE_SPACE)),
457         new BinaryProperty(SRC_CHAR_AND_PROPSVEC) {  // UCHAR_POSIX_ALNUM
458             @Override
459             boolean contains(int c) {
460                 return UCharacter.isUAlphabetic(c) || UCharacter.isDigit(c);
461             }
462         },
463         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_BLANK
464             @Override
465             boolean contains(int c) {
466                 // "horizontal space"
467                 if(c<=0x9f) {
468                     return c==9 || c==0x20; /* TAB or SPACE */
469                 } else {
470                     /* Zs */
471                     return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR;
472                 }
473             }
474         },
475         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_GRAPH
476             @Override
477             boolean contains(int c) {
478                 return isgraphPOSIX(c);
479             }
480         },
481         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_PRINT
482             @Override
483             boolean contains(int c) {
484                 /*
485                  * Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}.
486                  *
487                  * The only cntrl character in graph+blank is TAB (in blank).
488                  * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
489                  */
490                 return (UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(c);
491             }
492         },
493         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_XDIGIT
494             @Override
495             boolean contains(int c) {
496                 /* check ASCII and Fullwidth ASCII a-fA-F */
497                 if(
498                     (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
499                     (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
500                 ) {
501                     return true;
502                 }
503                 return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER;
504             }
505         },
506         new CaseBinaryProperty(UProperty.CASED),
507         new CaseBinaryProperty(UProperty.CASE_IGNORABLE),
508         new CaseBinaryProperty(UProperty.CHANGES_WHEN_LOWERCASED),
509         new CaseBinaryProperty(UProperty.CHANGES_WHEN_UPPERCASED),
510         new CaseBinaryProperty(UProperty.CHANGES_WHEN_TITLECASED),
511         new BinaryProperty(SRC_CASE_AND_NORM) {  // UCHAR_CHANGES_WHEN_CASEFOLDED
512             @Override
513             boolean contains(int c) {
514                 String nfd=Norm2AllModes.getNFCInstance().impl.getDecomposition(c);
515                 if(nfd!=null) {
516                     /* c has a decomposition */
517                     c=nfd.codePointAt(0);
518                     if(Character.charCount(c)!=nfd.length()) {
519                         /* multiple code points */
520                         c=-1;
521                     }
522                 } else if(c<0) {
523                     return false;  /* protect against bad input */
524                 }
525                 if(c>=0) {
526                     /* single code point */
527                     UCaseProps csp=UCaseProps.INSTANCE;
528                     UCaseProps.dummyStringBuilder.setLength(0);
529                     return csp.toFullFolding(c, UCaseProps.dummyStringBuilder,
530                                              UCharacter.FOLD_CASE_DEFAULT)>=0;
531                 } else {
532                     String folded=UCharacter.foldCase(nfd, true);
533                     return !folded.equals(nfd);
534                 }
535             }
536         },
537         new CaseBinaryProperty(UProperty.CHANGES_WHEN_CASEMAPPED),
538         new BinaryProperty(SRC_NFKC_CF) {  // UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
539             @Override
540             boolean contains(int c) {
541                 Normalizer2Impl kcf=Norm2AllModes.getNFKC_CFInstance().impl;
542                 String src=UTF16.valueOf(c);
543                 StringBuilder dest=new StringBuilder();
544                 // Small destCapacity for NFKC_CF(c).
545                 Normalizer2Impl.ReorderingBuffer buffer=new Normalizer2Impl.ReorderingBuffer(kcf, dest, 5);
546                 kcf.compose(src, 0, src.length(), false, true, buffer);
547                 return !Normalizer2Impl.UTF16Plus.equal(dest, src);
548             }
549         },
550         new EmojiBinaryProperty(UProperty.EMOJI),
551         new EmojiBinaryProperty(UProperty.EMOJI_PRESENTATION),
552         new EmojiBinaryProperty(UProperty.EMOJI_MODIFIER),
553         new EmojiBinaryProperty(UProperty.EMOJI_MODIFIER_BASE),
554         new EmojiBinaryProperty(UProperty.EMOJI_COMPONENT),
555         new BinaryProperty(SRC_PROPSVEC) {  // REGIONAL_INDICATOR
556             // Property starts are a subset of lb=RI etc.
557             @Override
558             boolean contains(int c) {
559                 return 0x1F1E6<=c && c<=0x1F1FF;
560             }
561         },
562         new BinaryProperty(1, 1<<PREPENDED_CONCATENATION_MARK),
563         new EmojiBinaryProperty(UProperty.EXTENDED_PICTOGRAPHIC),
564         new EmojiBinaryProperty(UProperty.BASIC_EMOJI),
565         new EmojiBinaryProperty(UProperty.EMOJI_KEYCAP_SEQUENCE),
566         new EmojiBinaryProperty(UProperty.RGI_EMOJI_MODIFIER_SEQUENCE),
567         new EmojiBinaryProperty(UProperty.RGI_EMOJI_FLAG_SEQUENCE),
568         new EmojiBinaryProperty(UProperty.RGI_EMOJI_TAG_SEQUENCE),
569         new EmojiBinaryProperty(UProperty.RGI_EMOJI_ZWJ_SEQUENCE),
570         new EmojiBinaryProperty(UProperty.RGI_EMOJI),
571     };
572 
hasBinaryProperty(int c, int which)573     public boolean hasBinaryProperty(int c, int which) {
574          if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) {
575             // not a known binary property
576             return false;
577         } else {
578             return binProps[which].contains(c);
579         }
580     }
581 
582     // int-value and enumerated properties --------------------------------- ***
583 
getType(int c)584     public int getType(int c) {
585         return getProperty(c)&TYPE_MASK;
586     }
587 
588     /*
589      * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
590      * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
591      */
592     private static final int /* UHangulSyllableType */ gcbToHst[]={
593         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_OTHER */
594         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CONTROL */
595         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CR */
596         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_EXTEND */
597         HangulSyllableType.LEADING_JAMO,     /* U_GCB_L */
598         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_LF */
599         HangulSyllableType.LV_SYLLABLE,      /* U_GCB_LV */
600         HangulSyllableType.LVT_SYLLABLE,     /* U_GCB_LVT */
601         HangulSyllableType.TRAILING_JAMO,    /* U_GCB_T */
602         HangulSyllableType.VOWEL_JAMO        /* U_GCB_V */
603         /*
604          * Omit GCB values beyond what we need for hst.
605          * The code below checks for the array length.
606          */
607     };
608 
609     private class IntProperty {
610         int column;  // SRC_PROPSVEC column, or "source" if mask==0
611         int mask;
612         int shift;
IntProperty(int column, int mask, int shift)613         IntProperty(int column, int mask, int shift) {
614             this.column=column;
615             this.mask=mask;
616             this.shift=shift;
617         }
IntProperty(int source)618         IntProperty(int source) {
619             this.column=source;
620             this.mask=0;
621         }
getSource()622         final int getSource() {
623             return mask==0 ? column : SRC_PROPSVEC;
624         }
getValue(int c)625         int getValue(int c) {
626             // systematic, directly stored properties
627             return (getAdditional(c, column)&mask)>>>shift;
628         }
getMaxValue(int which)629         int getMaxValue(int which) {
630             return (getMaxValues(column)&mask)>>>shift;
631         }
632     }
633 
634     private class BiDiIntProperty extends IntProperty {
BiDiIntProperty()635         BiDiIntProperty() {
636             super(SRC_BIDI);
637         }
638         @Override
getMaxValue(int which)639         int getMaxValue(int which) {
640             return UBiDiProps.INSTANCE.getMaxValue(which);
641         }
642     }
643 
644     private class CombiningClassIntProperty extends IntProperty {
CombiningClassIntProperty(int source)645         CombiningClassIntProperty(int source) {
646             super(source);
647         }
648         @Override
getMaxValue(int which)649         int getMaxValue(int which) {
650             return 0xff;
651         }
652     }
653 
654     private class NormQuickCheckIntProperty extends IntProperty {  // UCHAR_NF*_QUICK_CHECK properties
655         int which;
656         int max;
NormQuickCheckIntProperty(int source, int which, int max)657         NormQuickCheckIntProperty(int source, int which, int max) {
658             super(source);
659             this.which=which;
660             this.max=max;
661         }
662         @Override
getValue(int c)663         int getValue(int c) {
664             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_QUICK_CHECK).getQuickCheck(c);
665         }
666         @Override
getMaxValue(int which)667         int getMaxValue(int which) {
668             return max;
669         }
670     }
671 
672     IntProperty intProps[]={
673         new BiDiIntProperty() {  // BIDI_CLASS
674             @Override
675             int getValue(int c) {
676                 return UBiDiProps.INSTANCE.getClass(c);
677             }
678         },
679         new IntProperty(0, BLOCK_MASK_, BLOCK_SHIFT_),
680         new CombiningClassIntProperty(SRC_NFC) {  // CANONICAL_COMBINING_CLASS
681             @Override
682             int getValue(int c) {
683                 return Normalizer2.getNFDInstance().getCombiningClass(c);
684             }
685         },
686         new IntProperty(2, DECOMPOSITION_TYPE_MASK_, 0),
687         new IntProperty(0, EAST_ASIAN_MASK_, EAST_ASIAN_SHIFT_),
688         new IntProperty(SRC_CHAR) {  // GENERAL_CATEGORY
689             @Override
690             int getValue(int c) {
691                 return getType(c);
692             }
693             @Override
694             int getMaxValue(int which) {
695                 return UCharacterCategory.CHAR_CATEGORY_COUNT-1;
696             }
697         },
698         new BiDiIntProperty() {  // JOINING_GROUP
699             @Override
700             int getValue(int c) {
701                 return UBiDiProps.INSTANCE.getJoiningGroup(c);
702             }
703         },
704         new BiDiIntProperty() {  // JOINING_TYPE
705             @Override
706             int getValue(int c) {
707                 return UBiDiProps.INSTANCE.getJoiningType(c);
708             }
709         },
710         new IntProperty(2, LB_MASK, LB_SHIFT),  // LINE_BREAK
711         new IntProperty(SRC_CHAR) {  // NUMERIC_TYPE
712             @Override
713             int getValue(int c) {
714                 return ntvGetType(getNumericTypeValue(getProperty(c)));
715             }
716             @Override
717             int getMaxValue(int which) {
718                 return NumericType.COUNT-1;
719             }
720         },
721         new IntProperty(SRC_PROPSVEC) {
722             @Override
723             int getValue(int c) {
724                 return UScript.getScript(c);
725             }
726             @Override
727             int getMaxValue(int which) {
728                 int scriptX=getMaxValues(0)&SCRIPT_X_MASK;
729                 return mergeScriptCodeOrIndex(scriptX);
730             }
731         },
732         new IntProperty(SRC_PROPSVEC) {  // HANGUL_SYLLABLE_TYPE
733             @Override
734             int getValue(int c) {
735                 /* see comments on gcbToHst[] above */
736                 int gcb=(getAdditional(c, 2)&GCB_MASK)>>>GCB_SHIFT;
737                 if(gcb<gcbToHst.length) {
738                     return gcbToHst[gcb];
739                 } else {
740                     return HangulSyllableType.NOT_APPLICABLE;
741                 }
742             }
743             @Override
744             int getMaxValue(int which) {
745                 return HangulSyllableType.COUNT-1;
746             }
747         },
748         // max=1=YES -- these are never "maybe", only "no" or "yes"
749         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFD_QUICK_CHECK, 1),
750         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKD_QUICK_CHECK, 1),
751         // max=2=MAYBE
752         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFC_QUICK_CHECK, 2),
753         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKC_QUICK_CHECK, 2),
754         new CombiningClassIntProperty(SRC_NFC) {  // LEAD_CANONICAL_COMBINING_CLASS
755             @Override
756             int getValue(int c) {
757                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)>>8;
758             }
759         },
760         new CombiningClassIntProperty(SRC_NFC) {  // TRAIL_CANONICAL_COMBINING_CLASS
761             @Override
762             int getValue(int c) {
763                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)&0xff;
764             }
765         },
766         new IntProperty(2, GCB_MASK, GCB_SHIFT),  // GRAPHEME_CLUSTER_BREAK
767         new IntProperty(2, SB_MASK, SB_SHIFT),  // SENTENCE_BREAK
768         new IntProperty(2, WB_MASK, WB_SHIFT),  // WORD_BREAK
769         new BiDiIntProperty() {  // BIDI_PAIRED_BRACKET_TYPE
770             @Override
771             int getValue(int c) {
772                 return UBiDiProps.INSTANCE.getPairedBracketType(c);
773             }
774         },
775         new IntProperty(SRC_INPC) {
776             @Override
777             int getValue(int c) {
778                 CodePointTrie trie = LayoutProps.INSTANCE.inpcTrie;
779                 return trie != null ? trie.get(c) : 0;
780             }
781             @Override
782             int getMaxValue(int which) {
783                 return LayoutProps.INSTANCE.maxInpcValue;
784             }
785         },
786         new IntProperty(SRC_INSC) {
787             @Override
788             int getValue(int c) {
789                 CodePointTrie trie = LayoutProps.INSTANCE.inscTrie;
790                 return trie != null ? trie.get(c) : 0;
791             }
792             @Override
793             int getMaxValue(int which) {
794                 return LayoutProps.INSTANCE.maxInscValue;
795             }
796         },
797         new IntProperty(SRC_VO) {
798             @Override
799             int getValue(int c) {
800                 CodePointTrie trie = LayoutProps.INSTANCE.voTrie;
801                 return trie != null ? trie.get(c) : 0;
802             }
803             @Override
804             int getMaxValue(int which) {
805                 return LayoutProps.INSTANCE.maxVoValue;
806             }
807         },
808     };
809 
getIntPropertyValue(int c, int which)810     public int getIntPropertyValue(int c, int which) {
811         if(which<UProperty.INT_START) {
812             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
813                 return binProps[which].contains(c) ? 1 : 0;
814             }
815         } else if(which<UProperty.INT_LIMIT) {
816             return intProps[which-UProperty.INT_START].getValue(c);
817         } else if (which == UProperty.GENERAL_CATEGORY_MASK) {
818             return getMask(getType(c));
819         }
820         return 0; // undefined
821     }
822 
getIntPropertyMaxValue(int which)823     public int getIntPropertyMaxValue(int which) {
824         if(which<UProperty.INT_START) {
825             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
826                 return 1;  // maximum TRUE for all binary properties
827             }
828         } else if(which<UProperty.INT_LIMIT) {
829             return intProps[which-UProperty.INT_START].getMaxValue(which);
830         }
831         return -1; // undefined
832     }
833 
getSource(int which)834     final int getSource(int which) {
835         if(which<UProperty.BINARY_START) {
836             return SRC_NONE; /* undefined */
837         } else if(which<UProperty.BINARY_LIMIT) {
838             return binProps[which].getSource();
839         } else if(which<UProperty.INT_START) {
840             return SRC_NONE; /* undefined */
841         } else if(which<UProperty.INT_LIMIT) {
842             return intProps[which-UProperty.INT_START].getSource();
843         } else if(which<UProperty.STRING_START) {
844             switch(which) {
845             case UProperty.GENERAL_CATEGORY_MASK:
846             case UProperty.NUMERIC_VALUE:
847                 return SRC_CHAR;
848 
849             default:
850                 return SRC_NONE;
851             }
852         } else if(which<UProperty.STRING_LIMIT) {
853             switch(which) {
854             case UProperty.AGE:
855                 return SRC_PROPSVEC;
856 
857             case UProperty.BIDI_MIRRORING_GLYPH:
858                 return SRC_BIDI;
859 
860             case UProperty.CASE_FOLDING:
861             case UProperty.LOWERCASE_MAPPING:
862             case UProperty.SIMPLE_CASE_FOLDING:
863             case UProperty.SIMPLE_LOWERCASE_MAPPING:
864             case UProperty.SIMPLE_TITLECASE_MAPPING:
865             case UProperty.SIMPLE_UPPERCASE_MAPPING:
866             case UProperty.TITLECASE_MAPPING:
867             case UProperty.UPPERCASE_MAPPING:
868                 return SRC_CASE;
869 
870             case UProperty.ISO_COMMENT:
871             case UProperty.NAME:
872             case UProperty.UNICODE_1_NAME:
873                 return SRC_NAMES;
874 
875             default:
876                 return SRC_NONE;
877             }
878         } else {
879             switch(which) {
880             case UProperty.SCRIPT_EXTENSIONS:
881                 return SRC_PROPSVEC;
882             default:
883                 return SRC_NONE; /* undefined */
884             }
885         }
886     }
887 
888     /**
889      * <p>
890      * Unicode property names and property value names are compared
891      * "loosely". Property[Value]Aliases.txt say:
892      * <quote>
893      *   "With loose matching of property names, the case distinctions,
894      *    whitespace, and '_' are ignored."
895      * </quote>
896      * </p>
897      * <p>
898      * This function does just that, for ASCII (char *) name strings.
899      * It is almost identical to ucnv_compareNames() but also ignores
900      * ASCII White_Space characters (U+0009..U+000d).
901      * </p>
902      * @param name1 name to compare
903      * @param name2 name to compare
904      * @return 0 if names are equal, < 0 if name1 is less than name2 and > 0
905      *         if name1 is greater than name2.
906      */
907     /* to be implemented in 2.4
908      * public static int comparePropertyNames(String name1, String name2)
909     {
910         int result = 0;
911         int i1 = 0;
912         int i2 = 0;
913         while (true) {
914             char ch1 = 0;
915             char ch2 = 0;
916             // Ignore delimiters '-', '_', and ASCII White_Space
917             if (i1 < name1.length()) {
918                 ch1 = name1.charAt(i1 ++);
919             }
920             while (ch1 == '-' || ch1 == '_' || ch1 == ' ' || ch1 == '\t'
921                    || ch1 == '\n' // synwee what is || ch1 == '\v'
922                    || ch1 == '\f' || ch1=='\r') {
923                 if (i1 < name1.length()) {
924                     ch1 = name1.charAt(i1 ++);
925                 }
926                 else {
927                     ch1 = 0;
928                 }
929             }
930             if (i2 < name2.length()) {
931                 ch2 = name2.charAt(i2 ++);
932             }
933             while (ch2 == '-' || ch2 == '_' || ch2 == ' ' || ch2 == '\t'
934                    || ch2 == '\n' // synwee what is || ch1 == '\v'
935                    || ch2 == '\f' || ch2=='\r') {
936                 if (i2 < name2.length()) {
937                     ch2 = name2.charAt(i2 ++);
938                 }
939                 else {
940                     ch2 = 0;
941                 }
942             }
943 
944             // If we reach the ends of both strings then they match
945             if (ch1 == 0 && ch2 == 0) {
946                 return 0;
947             }
948 
949             // Case-insensitive comparison
950             if (ch1 != ch2) {
951                 result = Character.toLowerCase(ch1)
952                                                 - Character.toLowerCase(ch2);
953                 if (result != 0) {
954                     return result;
955                 }
956             }
957         }
958     }
959     */
960 
961     /**
962      * Get the the maximum values for some enum/int properties.
963      * @return maximum values for the integer properties.
964      */
getMaxValues(int column)965     public int getMaxValues(int column)
966     {
967        // return m_maxBlockScriptValue_;
968 
969         switch(column) {
970         case 0:
971             return m_maxBlockScriptValue_;
972         case 2:
973             return m_maxJTGValue_;
974         default:
975             return 0;
976         }
977     }
978 
979     /**
980      * Gets the type mask
981      * @param type character type
982      * @return mask
983      */
getMask(int type)984     public static final int getMask(int type)
985     {
986         return 1 << type;
987     }
988 
989 
990     /**
991      * Returns the digit values of characters like 'A' - 'Z', normal,
992      * half-width and full-width. This method assumes that the other digit
993      * characters are checked by the calling method.
994      * @param ch character to test
995      * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
996      *         its corresponding digit will be returned.
997      */
getEuropeanDigit(int ch)998     public static int getEuropeanDigit(int ch) {
999         if ((ch > 0x7a && ch < 0xff21)
1000             || ch < 0x41 || (ch > 0x5a && ch < 0x61)
1001             || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
1002             return -1;
1003         }
1004         if (ch <= 0x7a) {
1005             // ch >= 0x41 or ch < 0x61
1006             return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
1007         }
1008         // ch >= 0xff21
1009         if (ch <= 0xff3a) {
1010             return ch + 10 - 0xff21;
1011         }
1012         // ch >= 0xff41 && ch <= 0xff5a
1013         return ch + 10 - 0xff41;
1014     }
1015 
digit(int c)1016     public int digit(int c) {
1017         int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
1018         if(value<=9) {
1019             return value;
1020         } else {
1021             return -1;
1022         }
1023     }
1024 
getNumericValue(int c)1025     public int getNumericValue(int c) {
1026         // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
1027         int ntv = getNumericTypeValue(getProperty(c));
1028 
1029         if(ntv==NTV_NONE_) {
1030             return getEuropeanDigit(c);
1031         } else if(ntv<NTV_DIGIT_START_) {
1032             /* decimal digit */
1033             return ntv-NTV_DECIMAL_START_;
1034         } else if(ntv<NTV_NUMERIC_START_) {
1035             /* other digit */
1036             return ntv-NTV_DIGIT_START_;
1037         } else if(ntv<NTV_FRACTION_START_) {
1038             /* small integer */
1039             return ntv-NTV_NUMERIC_START_;
1040         } else if(ntv<NTV_LARGE_START_) {
1041             /* fraction */
1042             return -2;
1043         } else if(ntv<NTV_BASE60_START_) {
1044             /* large, single-significant-digit integer */
1045             int mant=(ntv>>5)-14;
1046             int exp=(ntv&0x1f)+2;
1047             if(exp<9 || (exp==9 && mant<=2)) {
1048                 int numValue=mant;
1049                 do {
1050                     numValue*=10;
1051                 } while(--exp>0);
1052                 return numValue;
1053             } else {
1054                 return -2;
1055             }
1056         } else if(ntv<NTV_FRACTION20_START_) {
1057             /* sexagesimal (base 60) integer */
1058             int numValue=(ntv>>2)-0xbf;
1059             int exp=(ntv&3)+1;
1060 
1061             switch(exp) {
1062             case 4:
1063                 numValue*=60*60*60*60;
1064                 break;
1065             case 3:
1066                 numValue*=60*60*60;
1067                 break;
1068             case 2:
1069                 numValue*=60*60;
1070                 break;
1071             case 1:
1072                 numValue*=60;
1073                 break;
1074             case 0:
1075             default:
1076                 break;
1077             }
1078 
1079             return numValue;
1080         } else if(ntv<NTV_RESERVED_START_) {
1081             // fraction-20 e.g. 3/80
1082             return -2;
1083         } else {
1084             /* reserved */
1085             return -2;
1086         }
1087     }
1088 
getUnicodeNumericValue(int c)1089     public double getUnicodeNumericValue(int c) {
1090         // equivalent to c version double u_getNumericValue(UChar32 c)
1091         int ntv = getNumericTypeValue(getProperty(c));
1092 
1093         if(ntv==NTV_NONE_) {
1094             return UCharacter.NO_NUMERIC_VALUE;
1095         } else if(ntv<NTV_DIGIT_START_) {
1096             /* decimal digit */
1097             return ntv-NTV_DECIMAL_START_;
1098         } else if(ntv<NTV_NUMERIC_START_) {
1099             /* other digit */
1100             return ntv-NTV_DIGIT_START_;
1101         } else if(ntv<NTV_FRACTION_START_) {
1102             /* small integer */
1103             return ntv-NTV_NUMERIC_START_;
1104         } else if(ntv<NTV_LARGE_START_) {
1105             /* fraction */
1106             int numerator=(ntv>>4)-12;
1107             int denominator=(ntv&0xf)+1;
1108             return (double)numerator/denominator;
1109         } else if(ntv<NTV_BASE60_START_) {
1110             /* large, single-significant-digit integer */
1111             double numValue;
1112             int mant=(ntv>>5)-14;
1113             int exp=(ntv&0x1f)+2;
1114             numValue=mant;
1115 
1116             /* multiply by 10^exp without math.h */
1117             while(exp>=4) {
1118                 numValue*=10000.;
1119                 exp-=4;
1120             }
1121             switch(exp) {
1122             case 3:
1123                 numValue*=1000.;
1124                 break;
1125             case 2:
1126                 numValue*=100.;
1127                 break;
1128             case 1:
1129                 numValue*=10.;
1130                 break;
1131             case 0:
1132             default:
1133                 break;
1134             }
1135 
1136             return numValue;
1137         } else if(ntv<NTV_FRACTION20_START_) {
1138             /* sexagesimal (base 60) integer */
1139             int numValue=(ntv>>2)-0xbf;
1140             int exp=(ntv&3)+1;
1141 
1142             switch(exp) {
1143             case 4:
1144                 numValue*=60*60*60*60;
1145                 break;
1146             case 3:
1147                 numValue*=60*60*60;
1148                 break;
1149             case 2:
1150                 numValue*=60*60;
1151                 break;
1152             case 1:
1153                 numValue*=60;
1154                 break;
1155             case 0:
1156             default:
1157                 break;
1158             }
1159 
1160             return numValue;
1161         } else if(ntv<NTV_FRACTION32_START_) {
1162             // fraction-20 e.g. 3/80
1163             int frac20=ntv-NTV_FRACTION20_START_;  // 0..0x17
1164             int numerator=2*(frac20&3)+1;
1165             int denominator=20<<(frac20>>2);
1166             return (double)numerator/denominator;
1167         } else if(ntv<NTV_RESERVED_START_) {
1168             // fraction-32 e.g. 3/64
1169             int frac32=ntv-NTV_FRACTION32_START_;  // 0..15
1170             int numerator=2*(frac32&3)+1;
1171             int denominator=32<<(frac32>>2);
1172             return (double)numerator/denominator;
1173         } else {
1174             /* reserved */
1175             return UCharacter.NO_NUMERIC_VALUE;
1176         }
1177     }
1178 
1179     // protected variables -----------------------------------------------
1180 
1181     /**
1182      * Extra property trie
1183      */
1184     Trie2_16 m_additionalTrie_;
1185     /**
1186      * Extra property vectors, 1st column for age and second for binary
1187      * properties.
1188      */
1189     int m_additionalVectors_[];
1190     /**
1191      * Number of additional columns
1192      */
1193     int m_additionalColumnsCount_;
1194     /**
1195      * Maximum values for block, bits used as in vector word
1196      * 0
1197      */
1198     int m_maxBlockScriptValue_;
1199     /**
1200      * Maximum values for script, bits used as in vector word
1201      * 0
1202      */
1203      int m_maxJTGValue_;
1204 
1205     /**
1206      * Script_Extensions data
1207      */
1208     public char[] m_scriptExtensions_;
1209 
1210     // private variables -------------------------------------------------
1211 
1212     /**
1213     * Default name of the datafile
1214     */
1215     private static final String DATA_FILE_NAME_ = "uprops.icu";
1216 
1217     // property data constants -------------------------------------------------
1218 
1219     /**
1220      * Numeric types and values in the main properties words.
1221      */
1222     private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
getNumericTypeValue(int props)1223     private static final int getNumericTypeValue(int props) {
1224         return props >> NUMERIC_TYPE_VALUE_SHIFT_;
1225     }
1226     /* constants for the storage form of numeric types and values */
1227     /** No numeric value. */
1228     private static final int NTV_NONE_ = 0;
1229     /** Decimal digits: nv=0..9 */
1230     private static final int NTV_DECIMAL_START_ = 1;
1231     /** Other digits: nv=0..9 */
1232     private static final int NTV_DIGIT_START_ = 11;
1233     /** Small integers: nv=0..154 */
1234     private static final int NTV_NUMERIC_START_ = 21;
1235     /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
1236     private static final int NTV_FRACTION_START_ = 0xb0;
1237     /**
1238      * Large integers:
1239      * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
1240      * (only one significant decimal digit)
1241      */
1242     private static final int NTV_LARGE_START_ = 0x1e0;
1243     /**
1244      * Sexagesimal numbers:
1245      * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
1246      */
1247     private static final int NTV_BASE60_START_=0x300;
1248     /**
1249      * Fraction-20 values:
1250      * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
1251      * numerator: num = 2*(frac20&3)+1
1252      * denominator: den = 20<<(frac20>>2)
1253      */
1254     private static final int NTV_FRACTION20_START_ = NTV_BASE60_START_ + 36;  // 0x300+9*4=0x324
1255     /**
1256      * Fraction-32 values:
1257      * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
1258      * numerator: num = 2*(frac32&3)+1
1259      * denominator: den = 32<<(frac32>>2)
1260      */
1261     private static final int NTV_FRACTION32_START_ = NTV_FRACTION20_START_ + 24;  // 0x324+6*4=0x34c
1262     /** No numeric value (yet). */
1263     private static final int NTV_RESERVED_START_ = NTV_FRACTION32_START_ + 16;  // 0x34c+4*4=0x35c
1264 
ntvGetType(int ntv)1265     private static final int ntvGetType(int ntv) {
1266         return
1267             (ntv==NTV_NONE_) ? NumericType.NONE :
1268             (ntv<NTV_DIGIT_START_) ?  NumericType.DECIMAL :
1269             (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
1270             NumericType.NUMERIC;
1271     }
1272 
1273     /*
1274      * Properties in vector word 0
1275      * Bits
1276      * 31..24   DerivedAge version major/minor one nibble each
1277      * 23..22   3..1: Bits 21..20 & 7..0 = Script_Extensions index
1278      *             3: Script value from Script_Extensions
1279      *             2: Script=Inherited
1280      *             1: Script=Common
1281      *             0: Script=bits 21..20 & 7..0
1282      * 21..20   Bits 9..8 of the UScriptCode, or index to Script_Extensions
1283      * 19..17   East Asian Width
1284      * 16.. 8   UBlockCode
1285      *  7.. 0   UScriptCode, or index to Script_Extensions
1286      */
1287 
1288     /**
1289      * Script_Extensions: mask includes Script
1290      */
1291     public static final int SCRIPT_X_MASK = 0x00f000ff;
1292     //private static final int SCRIPT_X_SHIFT = 22;
1293 
1294     // The UScriptCode or Script_Extensions index is split across two bit fields.
1295     // (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.)
1296     // Shift the high bits right by 12 to assemble the full value.
1297     public static final int SCRIPT_HIGH_MASK = 0x00300000;
1298     public static final int SCRIPT_HIGH_SHIFT = 12;
1299     public static final int MAX_SCRIPT = 0x3ff;
1300 
1301     /**
1302      * Integer properties mask and shift values for East Asian cell width.
1303      * Equivalent to icu4c UPROPS_EA_MASK
1304      */
1305     private static final int EAST_ASIAN_MASK_ = 0x000e0000;
1306     /**
1307      * Integer properties mask and shift values for East Asian cell width.
1308      * Equivalent to icu4c UPROPS_EA_SHIFT
1309      */
1310     private static final int EAST_ASIAN_SHIFT_ = 17;
1311     /**
1312      * Integer properties mask and shift values for blocks.
1313      * Equivalent to icu4c UPROPS_BLOCK_MASK
1314      */
1315     private static final int BLOCK_MASK_ = 0x0001ff00;
1316     /**
1317      * Integer properties mask and shift values for blocks.
1318      * Equivalent to icu4c UPROPS_BLOCK_SHIFT
1319      */
1320     private static final int BLOCK_SHIFT_ = 8;
1321     /**
1322      * Integer properties mask and shift values for scripts.
1323      * Equivalent to icu4c UPROPS_SHIFT_LOW_MASK.
1324      */
1325     public static final int SCRIPT_LOW_MASK = 0x000000ff;
1326 
1327     /* SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
1328     public static final int SCRIPT_X_WITH_COMMON = 0x400000;
1329     public static final int SCRIPT_X_WITH_INHERITED = 0x800000;
1330     public static final int SCRIPT_X_WITH_OTHER = 0xc00000;
1331 
mergeScriptCodeOrIndex(int scriptX)1332     public static final int mergeScriptCodeOrIndex(int scriptX) {
1333         return
1334             ((scriptX & SCRIPT_HIGH_MASK) >> SCRIPT_HIGH_SHIFT) |
1335             (scriptX & SCRIPT_LOW_MASK);
1336     }
1337 
1338     /**
1339      * Additional properties used in internal trie data
1340      */
1341     /*
1342      * Properties in vector word 1
1343      * Each bit encodes one binary property.
1344      * The following constants represent the bit number, use 1<<UPROPS_XYZ.
1345      * UPROPS_BINARY_1_TOP<=32!
1346      *
1347      * Keep this list of property enums in sync with
1348      * propListNames[] in icu/source/tools/genprops/props2.c!
1349      *
1350      * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
1351      */
1352     private static final int WHITE_SPACE_PROPERTY_ = 0;
1353     private static final int DASH_PROPERTY_ = 1;
1354     private static final int HYPHEN_PROPERTY_ = 2;
1355     private static final int QUOTATION_MARK_PROPERTY_ = 3;
1356     private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
1357     private static final int MATH_PROPERTY_ = 5;
1358     private static final int HEX_DIGIT_PROPERTY_ = 6;
1359     private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
1360     private static final int ALPHABETIC_PROPERTY_ = 8;
1361     private static final int IDEOGRAPHIC_PROPERTY_ = 9;
1362     private static final int DIACRITIC_PROPERTY_ = 10;
1363     private static final int EXTENDER_PROPERTY_ = 11;
1364     private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
1365     private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
1366     private static final int GRAPHEME_LINK_PROPERTY_ = 14;
1367     private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
1368     private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
1369     private static final int RADICAL_PROPERTY_ = 17;
1370     private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
1371     private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
1372     private static final int DEPRECATED_PROPERTY_ = 20;
1373     private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
1374     private static final int XID_START_PROPERTY_ = 22;
1375     private static final int XID_CONTINUE_PROPERTY_ = 23;
1376     private static final int ID_START_PROPERTY_    = 24;
1377     private static final int ID_CONTINUE_PROPERTY_ = 25;
1378     private static final int GRAPHEME_BASE_PROPERTY_ = 26;
1379     private static final int S_TERM_PROPERTY_ = 27;
1380     private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
1381     private static final int PATTERN_SYNTAX = 29;                   /* new in ICU 3.4 and Unicode 4.1 */
1382     private static final int PATTERN_WHITE_SPACE = 30;
1383     private static final int PREPENDED_CONCATENATION_MARK = 31;     // new in ICU 60 and Unicode 10
1384 
1385     /*
1386      * Properties in vector word 2
1387      * Bits
1388      * 31..26   unused since ICU 70 added uemoji.icu;
1389      *          in ICU 57..69 stored emoji properties
1390      * 25..20   Line Break
1391      * 19..15   Sentence Break
1392      * 14..10   Word Break
1393      *  9.. 5   Grapheme Cluster Break
1394      *  4.. 0   Decomposition Type
1395      */
1396     //ivate static final int PROPS_2_EXTENDED_PICTOGRAPHIC=26;  // ICU 62..69
1397     //ivate static final int PROPS_2_EMOJI_COMPONENT = 27;  // ICU 60..69
1398     //ivate static final int PROPS_2_EMOJI = 28;  // ICU 57..69
1399     //ivate static final int PROPS_2_EMOJI_PRESENTATION = 29;  // ICU 57..69
1400     //ivate static final int PROPS_2_EMOJI_MODIFIER = 30;  // ICU 57..69
1401     //ivate static final int PROPS_2_EMOJI_MODIFIER_BASE = 31;  // ICU 57..69
1402 
1403     private static final int LB_MASK          = 0x03f00000;
1404     private static final int LB_SHIFT         = 20;
1405 
1406     private static final int SB_MASK          = 0x000f8000;
1407     private static final int SB_SHIFT         = 15;
1408 
1409     private static final int WB_MASK          = 0x00007c00;
1410     private static final int WB_SHIFT         = 10;
1411 
1412     private static final int GCB_MASK         = 0x000003e0;
1413     private static final int GCB_SHIFT        = 5;
1414 
1415     /**
1416      * Integer properties mask for decomposition type.
1417      * Equivalent to icu4c UPROPS_DT_MASK.
1418      */
1419     private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
1420 
1421     /**
1422      * First nibble shift
1423      */
1424     private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
1425     /**
1426      * Second nibble mask
1427      */
1428     private static final int LAST_NIBBLE_MASK_ = 0xF;
1429     /**
1430      * Age value shift
1431      */
1432     private static final int AGE_SHIFT_ = 24;
1433 
1434 
1435     // private constructors --------------------------------------------------
1436 
1437     /**
1438      * Constructor
1439      * @exception IOException thrown when data reading fails or data corrupted
1440      */
UCharacterProperty()1441     private UCharacterProperty() throws IOException
1442     {
1443         // consistency check
1444         if(binProps.length!=UProperty.BINARY_LIMIT) {
1445             throw new ICUException("binProps.length!=UProperty.BINARY_LIMIT");
1446         }
1447         if(intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)) {
1448             throw new ICUException("intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)");
1449         }
1450 
1451         // jar access
1452         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
1453         m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
1454         // Read or skip the 16 indexes.
1455         int propertyOffset = bytes.getInt();
1456         /* exceptionOffset = */ bytes.getInt();
1457         /* caseOffset = */ bytes.getInt();
1458         int additionalOffset = bytes.getInt();
1459         int additionalVectorsOffset = bytes.getInt();
1460         m_additionalColumnsCount_ = bytes.getInt();
1461         int scriptExtensionsOffset = bytes.getInt();
1462         int reservedOffset7 = bytes.getInt();
1463         /* reservedOffset8 = */ bytes.getInt();
1464         /* dataTopOffset = */ bytes.getInt();
1465         m_maxBlockScriptValue_ = bytes.getInt();
1466         m_maxJTGValue_ = bytes.getInt();
1467         ICUBinary.skipBytes(bytes, (16 - 12) << 2);
1468 
1469         // read the main properties trie
1470         m_trie_ = Trie2_16.createFromSerialized(bytes);
1471         int expectedTrieLength = (propertyOffset - 16) * 4;
1472         int trieLength = m_trie_.getSerializedLength();
1473         if(trieLength > expectedTrieLength) {
1474             throw new IOException("uprops.icu: not enough bytes for main trie");
1475         }
1476         // skip padding after trie bytes
1477         ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
1478 
1479         // skip unused intervening data structures
1480         ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);
1481 
1482         if(m_additionalColumnsCount_ > 0) {
1483             // reads the additional property block
1484             m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
1485             expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
1486             trieLength = m_additionalTrie_.getSerializedLength();
1487             if(trieLength > expectedTrieLength) {
1488                 throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
1489             }
1490             // skip padding after trie bytes
1491             ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
1492 
1493             // additional properties
1494             int size = scriptExtensionsOffset - additionalVectorsOffset;
1495             m_additionalVectors_ = ICUBinary.getInts(bytes, size, 0);
1496         }
1497 
1498         // Script_Extensions
1499         int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
1500         if(numChars > 0) {
1501             m_scriptExtensions_ = ICUBinary.getChars(bytes, numChars, 0);
1502         }
1503     }
1504 
1505     private static final class IsAcceptable implements ICUBinary.Authenticate {
1506         @Override
isDataVersionAcceptable(byte version[])1507         public boolean isDataVersionAcceptable(byte version[]) {
1508             return version[0] == 7;
1509         }
1510     }
1511     private static final int DATA_FORMAT = 0x5550726F;  // "UPro"
1512 
1513     // private methods -------------------------------------------------------
1514 
1515     /*
1516      * Compare additional properties to see if it has argument type
1517      * @param property 32 bit properties
1518      * @param type character type
1519      * @return true if property has type
1520      */
1521     /*private boolean compareAdditionalType(int property, int type)
1522     {
1523         return (property & (1 << type)) != 0;
1524     }*/
1525 
1526     // property starts for UnicodeSet -------------------------------------- ***
1527 
1528     private static final int TAB     = 0x0009;
1529     //private static final int LF      = 0x000a;
1530     //private static final int FF      = 0x000c;
1531     private static final int CR      = 0x000d;
1532     private static final int U_A     = 0x0041;
1533     private static final int U_F     = 0x0046;
1534     private static final int U_Z     = 0x005a;
1535     private static final int U_a     = 0x0061;
1536     private static final int U_f     = 0x0066;
1537     private static final int U_z     = 0x007a;
1538     private static final int DEL     = 0x007f;
1539     private static final int NL      = 0x0085;
1540     private static final int NBSP    = 0x00a0;
1541     private static final int CGJ     = 0x034f;
1542     private static final int FIGURESP= 0x2007;
1543     private static final int HAIRSP  = 0x200a;
1544     //private static final int ZWNJ    = 0x200c;
1545     //private static final int ZWJ     = 0x200d;
1546     private static final int RLM     = 0x200f;
1547     private static final int NNBSP   = 0x202f;
1548     private static final int WJ      = 0x2060;
1549     private static final int INHSWAP = 0x206a;
1550     private static final int NOMDIG  = 0x206f;
1551     private static final int U_FW_A  = 0xff21;
1552     private static final int U_FW_F  = 0xff26;
1553     private static final int U_FW_Z  = 0xff3a;
1554     private static final int U_FW_a  = 0xff41;
1555     private static final int U_FW_f  = 0xff46;
1556     private static final int U_FW_z  = 0xff5a;
1557     private static final int ZWNBSP  = 0xfeff;
1558 
addPropertyStarts(UnicodeSet set)1559     public UnicodeSet addPropertyStarts(UnicodeSet set) {
1560         /* add the start code point of each same-value range of the main trie */
1561         Iterator<Trie2.Range> trieIterator = m_trie_.iterator();
1562         Trie2.Range range;
1563         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
1564             set.add(range.startCodePoint);
1565         }
1566 
1567         /* add code points with hardcoded properties, plus the ones following them */
1568 
1569         /* add for u_isblank() */
1570         set.add(TAB);
1571         set.add(TAB+1);
1572 
1573         /* add for IS_THAT_CONTROL_SPACE() */
1574         set.add(CR+1); /* range TAB..CR */
1575         set.add(0x1c);
1576         set.add(0x1f+1);
1577         set.add(NL);
1578         set.add(NL+1);
1579 
1580         /* add for u_isIDIgnorable() what was not added above */
1581         set.add(DEL); /* range DEL..NBSP-1, NBSP added below */
1582         set.add(HAIRSP);
1583         set.add(RLM+1);
1584         set.add(INHSWAP);
1585         set.add(NOMDIG+1);
1586         set.add(ZWNBSP);
1587         set.add(ZWNBSP+1);
1588 
1589         /* add no-break spaces for u_isWhitespace() what was not added above */
1590         set.add(NBSP);
1591         set.add(NBSP+1);
1592         set.add(FIGURESP);
1593         set.add(FIGURESP+1);
1594         set.add(NNBSP);
1595         set.add(NNBSP+1);
1596 
1597         /* add for u_charDigitValue() */
1598         // TODO remove when UCharacter.getHanNumericValue() is changed to just return
1599         // Unicode numeric values
1600         set.add(0x3007);
1601         set.add(0x3008);
1602         set.add(0x4e00);
1603         set.add(0x4e01);
1604         set.add(0x4e8c);
1605         set.add(0x4e8d);
1606         set.add(0x4e09);
1607         set.add(0x4e0a);
1608         set.add(0x56db);
1609         set.add(0x56dc);
1610         set.add(0x4e94);
1611         set.add(0x4e95);
1612         set.add(0x516d);
1613         set.add(0x516e);
1614         set.add(0x4e03);
1615         set.add(0x4e04);
1616         set.add(0x516b);
1617         set.add(0x516c);
1618         set.add(0x4e5d);
1619         set.add(0x4e5e);
1620 
1621         /* add for u_digit() */
1622         set.add(U_a);
1623         set.add(U_z+1);
1624         set.add(U_A);
1625         set.add(U_Z+1);
1626         set.add(U_FW_a);
1627         set.add(U_FW_z+1);
1628         set.add(U_FW_A);
1629         set.add(U_FW_Z+1);
1630 
1631         /* add for u_isxdigit() */
1632         set.add(U_f+1);
1633         set.add(U_F+1);
1634         set.add(U_FW_f+1);
1635         set.add(U_FW_F+1);
1636 
1637         /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
1638         set.add(WJ); /* range WJ..NOMDIG */
1639         set.add(0xfff0);
1640         set.add(0xfffb+1);
1641         set.add(0xe0000);
1642         set.add(0xe0fff+1);
1643 
1644         /* add for UCHAR_GRAPHEME_BASE and others */
1645         set.add(CGJ);
1646         set.add(CGJ+1);
1647 
1648         return set; // for chaining
1649     }
1650 
upropsvec_addPropertyStarts(UnicodeSet set)1651     public void upropsvec_addPropertyStarts(UnicodeSet set) {
1652         /* add the start code point of each same-value range of the properties vectors trie */
1653         if(m_additionalColumnsCount_>0) {
1654             /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
1655             Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
1656             Trie2.Range range;
1657             while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
1658                 set.add(range.startCodePoint);
1659             }
1660         }
1661     }
1662 
ulayout_addPropertyStarts(int src, UnicodeSet set)1663     static UnicodeSet ulayout_addPropertyStarts(int src, UnicodeSet set) {
1664         return LayoutProps.INSTANCE.addPropertyStarts(src, set);
1665     }
1666 
1667     // This static initializer block must be placed after
1668     // other static member initialization
1669     static {
1670         try {
1671             INSTANCE = new UCharacterProperty();
1672         }
1673         catch (IOException e) {
1674             throw new MissingResourceException(e.getMessage(),"","");
1675         }
1676     }
1677 
1678 /*----------------------------------------------------------------
1679  * Inclusions list
1680  *----------------------------------------------------------------*/
1681 
1682     /*
1683      * Return a set of characters for property enumeration.
1684      * The set implicitly contains 0x110000 as well, which is one more than the highest
1685      * Unicode code point.
1686      *
1687      * This set is used as an ordered list - its code points are ordered, and
1688      * consecutive code points (in Unicode code point order) in the set define a range.
1689      * For each two consecutive characters (start, limit) in the set,
1690      * all of the UCD/normalization and related properties for
1691      * all code points start..limit-1 are all the same,
1692      * except for character names and ISO comments.
1693      *
1694      * All Unicode code points U+0000..U+10ffff are covered by these ranges.
1695      * The ranges define a partition of the Unicode code space.
1696      * ICU uses the inclusions set to enumerate properties for generating
1697      * UnicodeSets containing all code points that have a certain property value.
1698      *
1699      * The Inclusion List is generated from the UCD. It is generated
1700      * by enumerating the data tries, and code points for hardcoded properties
1701      * are added as well.
1702      *
1703      * --------------------------------------------------------------------------
1704      *
1705      * The following are ideas for getting properties-unique code point ranges,
1706      * with possible optimizations beyond the current implementation.
1707      * These optimizations would require more code and be more fragile.
1708      * The current implementation generates one single list (set) for all properties.
1709      *
1710      * To enumerate properties efficiently, one needs to know ranges of
1711      * repetitive values, so that the value of only each start code point
1712      * can be applied to the whole range.
1713      * This information is in principle available in the uprops.icu/unorm.icu data.
1714      *
1715      * There are two obstacles:
1716      *
1717      * 1. Some properties are computed from multiple data structures,
1718      *    making it necessary to get repetitive ranges by intersecting
1719      *    ranges from multiple tries.
1720      *
1721      * 2. It is not economical to write code for getting repetitive ranges
1722      *    that are precise for each of some 50 properties.
1723      *
1724      * Compromise ideas:
1725      *
1726      * - Get ranges per trie, not per individual property.
1727      *   Each range contains the same values for a whole group of properties.
1728      *   This would generate currently five range sets, two for uprops.icu tries
1729      *   and three for unorm.icu tries.
1730      *
1731      * - Combine sets of ranges for multiple tries to get sufficient sets
1732      *   for properties, e.g., the uprops.icu main and auxiliary tries
1733      *   for all non-normalization properties.
1734      *
1735      * Ideas for representing ranges and combining them:
1736      *
1737      * - A UnicodeSet could hold just the start code points of ranges.
1738      *   Multiple sets are easily combined by or-ing them together.
1739      *
1740      * - Alternatively, a UnicodeSet could hold each even-numbered range.
1741      *   All ranges could be enumerated by using each start code point
1742      *   (for the even-numbered ranges) as well as each limit (end+1) code point
1743      *   (for the odd-numbered ranges).
1744      *   It should be possible to combine two such sets by xor-ing them,
1745      *   but no more than two.
1746      *
1747      * The second way to represent ranges may(?!) yield smaller UnicodeSet arrays,
1748      * but the first one is certainly simpler and applicable for combining more than
1749      * two range sets.
1750      *
1751      * It is possible to combine all range sets for all uprops/unorm tries into one
1752      * set that can be used for all properties.
1753      * As an optimization, there could be less-combined range sets for certain
1754      * groups of properties.
1755      * The relationship of which less-combined range set to use for which property
1756      * depends on the implementation of the properties and must be hardcoded
1757      * - somewhat error-prone and higher maintenance but can be tested easily
1758      * by building property sets "the simple way" in test code.
1759      *
1760      * ---
1761      *
1762      * Do not use a UnicodeSet pattern because that causes infinite recursion;
1763      * UnicodeSet depends on the inclusions set.
1764      *
1765      * ---
1766      *
1767      * getInclusions() is commented out starting 2005-feb-12 because
1768      * UnicodeSet now calls the uxyz_addPropertyStarts() directly,
1769      * and only for the relevant property source.
1770      */
1771     /*
1772     public UnicodeSet getInclusions() {
1773         UnicodeSet set = new UnicodeSet();
1774         NormalizerImpl.addPropertyStarts(set);
1775         addPropertyStarts(set);
1776         return set;
1777     }
1778     */
1779 }
1780