• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.impl;
11 
12 import java.io.IOException;
13 import java.nio.ByteBuffer;
14 import java.util.Iterator;
15 import java.util.MissingResourceException;
16 
17 import com.ibm.icu.lang.UCharacter;
18 import com.ibm.icu.lang.UCharacter.HangulSyllableType;
19 import com.ibm.icu.lang.UCharacter.NumericType;
20 import com.ibm.icu.lang.UCharacterCategory;
21 import com.ibm.icu.lang.UProperty;
22 import com.ibm.icu.lang.UScript;
23 import com.ibm.icu.text.Normalizer2;
24 import com.ibm.icu.text.UTF16;
25 import com.ibm.icu.text.UnicodeSet;
26 import com.ibm.icu.util.CodePointMap;
27 import com.ibm.icu.util.CodePointTrie;
28 import com.ibm.icu.util.ICUException;
29 import com.ibm.icu.util.ICUUncheckedIOException;
30 import com.ibm.icu.util.VersionInfo;
31 
32 /**
33 * <p>Internal class used for Unicode character property database.</p>
34 * <p>This classes store binary data read from uprops.icu.
35 * It does not have the capability to parse the data into more high-level
36 * information. It only returns bytes of information when required.</p>
37 * <p>Due to the form most commonly used for retrieval, array of char is used
38 * to store the binary data.</p>
39 * <p>UCharacterPropertyDB also contains information on accessing indexes to
40 * significant points in the binary data.</p>
41 * <p>Responsibility for molding the binary data into more meaning form lies on
42 * <a href=UCharacter.html>UCharacter</a>.</p>
43 * @author Syn Wee Quek
44 * @since release 2.1, february 1st 2002
45 */
46 
47 public final class UCharacterProperty
48 {
49     // public data members -----------------------------------------------
50 
51     /*
52      * public singleton instance
53      */
54     public static final UCharacterProperty INSTANCE;
55 
56     /**
57     * Trie data
58     */
59     public Trie2_16 m_trie_;
60     /**
61     * Unicode version
62     */
63     public VersionInfo m_unicodeVersion_;
64     /**
65     * Latin capital letter i with dot above
66     */
67     public static final char LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_ = 0x130;
68     /**
69     * Latin small letter i with dot above
70     */
71     public static final char LATIN_SMALL_LETTER_DOTLESS_I_ = 0x131;
72     /**
73     * Latin lowercase i
74     */
75     public static final char LATIN_SMALL_LETTER_I_ = 0x69;
76     /**
77     * Character type mask
78     */
79     public static final int TYPE_MASK = 0x1F;
80 
81     // uprops.h enum UPropertySource --------------------------------------- ***
82 
83     /** No source, not a supported property. */
84     public static final int SRC_NONE=0;
85     /** From uchar.c/uprops.icu main trie */
86     public static final int SRC_CHAR=1;
87     /** From uchar.c/uprops.icu properties vectors trie */
88     public static final int SRC_PROPSVEC=2;
89     /** From unames.c/unames.icu */
90     public static final int SRC_NAMES=3;
91     /** From ucase.c/ucase.icu */
92     public static final int SRC_CASE=4;
93     /** From ubidi_props.c/ubidi.icu */
94     public static final int SRC_BIDI=5;
95     /** From uchar.c/uprops.icu main trie as well as properties vectors trie */
96     public static final int SRC_CHAR_AND_PROPSVEC=6;
97     /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
98     public static final int SRC_CASE_AND_NORM=7;
99     /** From normalizer2impl.cpp/nfc.nrm */
100     public static final int SRC_NFC=8;
101     /** From normalizer2impl.cpp/nfkc.nrm */
102     public static final int SRC_NFKC=9;
103     /** From normalizer2impl.cpp/nfkc_cf.nrm */
104     public static final int SRC_NFKC_CF=10;
105     /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
106     public static final int SRC_NFC_CANON_ITER=11;
107     // Text layout properties.
108     public static final int SRC_INPC=12;
109     public static final int SRC_INSC=13;
110     public static final int SRC_VO=14;
111     /** One more than the highest UPropertySource (SRC_) constant. */
112     public static final int SRC_COUNT=15;
113 
114     private static final class LayoutProps {
115         private static final class IsAcceptable implements ICUBinary.Authenticate {
116             @Override
isDataVersionAcceptable(byte version[])117             public boolean isDataVersionAcceptable(byte version[]) {
118                 return version[0] == 1;
119             }
120         }
121         private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
122         private static final int DATA_FORMAT = 0x4c61796f;  // "Layo"
123 
124         // indexes into indexes[]
125         // Element 0 stores the length of the indexes[] array.
126         //ivate static final int IX_INDEXES_LENGTH = 0;
127         // Elements 1..7 store the tops of consecutive code point tries.
128         // No trie is stored if the difference between two of these is less than 16.
129         private static final int IX_INPC_TRIE_TOP = 1;
130         private static final int IX_INSC_TRIE_TOP = 2;
131         private static final int IX_VO_TRIE_TOP = 3;
132         //ivate static final int IX_RESERVED_TOP = 4;
133 
134         //ivate static final int IX_TRIES_TOP = 7;
135 
136         private static final int IX_MAX_VALUES = 9;
137 
138         // Length of indexes[]. Multiple of 4 to 16-align the tries.
139         //ivate static final int IX_COUNT = 12;
140 
141         private static final int MAX_INPC_SHIFT = 24;
142         private static final int MAX_INSC_SHIFT = 16;
143         private static final int MAX_VO_SHIFT = 8;
144 
145         static final LayoutProps INSTANCE = new LayoutProps();
146 
147         CodePointTrie inpcTrie = null;  // Indic_Positional_Category
148         CodePointTrie inscTrie = null;  // Indic_Syllabic_Category
149         CodePointTrie voTrie = null;  // Vertical_Orientation
150 
151         int maxInpcValue = 0;
152         int maxInscValue = 0;
153         int maxVoValue = 0;
154 
LayoutProps()155         LayoutProps() {
156             ByteBuffer bytes = ICUBinary.getRequiredData("ulayout.icu");
157             try {
158                 ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
159                 int startPos = bytes.position();
160                 int indexesLength = bytes.getInt();  // inIndexes[IX_INDEXES_LENGTH]
161                 if (indexesLength < 12) {
162                     throw new ICUUncheckedIOException(
163                             "Text layout properties data: not enough indexes");
164                 }
165                 int[] inIndexes = new int[indexesLength];
166                 inIndexes[0] = indexesLength;
167                 for (int i = 1; i < indexesLength; ++i) {
168                     inIndexes[i] = bytes.getInt();
169                 }
170 
171                 int offset = indexesLength * 4;
172                 int top = inIndexes[IX_INPC_TRIE_TOP];
173                 int trieSize = top - offset;
174                 if (trieSize >= 16) {
175                     inpcTrie = CodePointTrie.fromBinary(null, null, bytes);
176                 }
177                 int pos = bytes.position() - startPos;
178                 assert top >= pos;
179                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
180                 offset = top;
181                 top = inIndexes[IX_INSC_TRIE_TOP];
182                 trieSize = top - offset;
183                 if (trieSize >= 16) {
184                     inscTrie = CodePointTrie.fromBinary(null, null, bytes);
185                 }
186                 pos = bytes.position() - startPos;
187                 assert top >= pos;
188                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
189                 offset = top;
190                 top = inIndexes[IX_VO_TRIE_TOP];
191                 trieSize = top - offset;
192                 if (trieSize >= 16) {
193                     voTrie = CodePointTrie.fromBinary(null, null, bytes);
194                 }
195                 pos = bytes.position() - startPos;
196                 assert top >= pos;
197                 ICUBinary.skipBytes(bytes, top - pos);  // skip padding after trie bytes
198 
199                 int maxValues = inIndexes[IX_MAX_VALUES];
200                 maxInpcValue = maxValues >>> MAX_INPC_SHIFT;
201                 maxInscValue = (maxValues >> MAX_INSC_SHIFT) & 0xff;
202                 maxVoValue = (maxValues >> MAX_VO_SHIFT) & 0xff;
203             } catch(IOException e) {
204                 throw new ICUUncheckedIOException(e);
205             }
206         }
207 
addPropertyStarts(int src, UnicodeSet set)208         public UnicodeSet addPropertyStarts(int src, UnicodeSet set) {
209             CodePointTrie trie;
210             switch (src) {
211             case SRC_INPC:
212                 trie = inpcTrie;
213                 break;
214             case SRC_INSC:
215                 trie = inscTrie;
216                 break;
217             case SRC_VO:
218                 trie = voTrie;
219                 break;
220             default:
221                 throw new IllegalStateException();
222             }
223 
224             if (trie == null) {
225                 throw new MissingResourceException(
226                         "no data for one of the text layout properties; src=" + src,
227                         "LayoutProps", "");
228             }
229 
230             // Add the start code point of each same-value range of the trie.
231             CodePointMap.Range range = new CodePointMap.Range();
232             int start = 0;
233             while (trie.getRange(start, null, range)) {
234                 set.add(start);
235                 start = range.getEnd() + 1;
236             }
237             return set;
238         }
239     }
240 
241     // public methods ----------------------------------------------------
242 
243     /**
244     * Gets the main property value for code point ch.
245     * @param ch code point whose property value is to be retrieved
246     * @return property value of code point
247     */
getProperty(int ch)248     public final int getProperty(int ch)
249     {
250         return m_trie_.get(ch);
251     }
252 
253     /**
254      * Gets the unicode additional properties.
255      * Java version of C u_getUnicodeProperties().
256      * @param codepoint codepoint whose additional properties is to be
257      *                  retrieved
258      * @param column The column index.
259      * @return unicode properties
260      */
getAdditional(int codepoint, int column)261     public int getAdditional(int codepoint, int column) {
262         assert column >= 0;
263         if (column >= m_additionalColumnsCount_) {
264             return 0;
265         }
266         return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
267     }
268 
269     static final int MY_MASK = UCharacterProperty.TYPE_MASK
270         & ((1<<UCharacterCategory.UPPERCASE_LETTER) |
271             (1<<UCharacterCategory.LOWERCASE_LETTER) |
272             (1<<UCharacterCategory.TITLECASE_LETTER) |
273             (1<<UCharacterCategory.MODIFIER_LETTER) |
274             (1<<UCharacterCategory.OTHER_LETTER));
275 
276 
277        /**
278      * <p>Get the "age" of the code point.</p>
279      * <p>The "age" is the Unicode version when the code point was first
280      * designated (as a non-character or for Private Use) or assigned a
281      * character.</p>
282      * <p>This can be useful to avoid emitting code points to receiving
283      * processes that do not accept newer characters.</p>
284      * <p>The data is from the UCD file DerivedAge.txt.</p>
285      * <p>This API does not check the validity of the codepoint.</p>
286      * @param codepoint The code point.
287      * @return the Unicode version number
288      */
getAge(int codepoint)289     public VersionInfo getAge(int codepoint)
290     {
291         int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
292         return VersionInfo.getInstance(
293                            (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
294                            version & LAST_NIBBLE_MASK_, 0, 0);
295     }
296 
297     private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED);
298     private static final int GC_CC_MASK = getMask(UCharacter.CONTROL);
299     private static final int GC_CS_MASK = getMask(UCharacter.SURROGATE);
300     private static final int GC_ZS_MASK = getMask(UCharacter.SPACE_SEPARATOR);
301     private static final int GC_ZL_MASK = getMask(UCharacter.LINE_SEPARATOR);
302     private static final int GC_ZP_MASK = getMask(UCharacter.PARAGRAPH_SEPARATOR);
303     /** Mask constant for multiple UCharCategory bits (Z Separators). */
304     private static final int GC_Z_MASK = GC_ZS_MASK|GC_ZL_MASK|GC_ZP_MASK;
305 
306     /**
307      * Checks if c is in
308      * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
309      * with space=\p{Whitespace} and Control=Cc.
310      * Implements UCHAR_POSIX_GRAPH.
311      * @internal
312      */
isgraphPOSIX(int c)313     private static final boolean isgraphPOSIX(int c) {
314         /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
315         /* comparing ==0 returns FALSE for the categories mentioned */
316         return (getMask(UCharacter.getType(c))&
317                 (GC_CC_MASK|GC_CS_MASK|GC_CN_MASK|GC_Z_MASK))
318                ==0;
319     }
320 
321     // binary properties --------------------------------------------------- ***
322 
323     private class BinaryProperty {
324         int column;  // SRC_PROPSVEC column, or "source" if mask==0
325         int mask;
BinaryProperty(int column, int mask)326         BinaryProperty(int column, int mask) {
327             this.column=column;
328             this.mask=mask;
329         }
BinaryProperty(int source)330         BinaryProperty(int source) {
331             this.column=source;
332             this.mask=0;
333         }
getSource()334         final int getSource() {
335             return mask==0 ? column : SRC_PROPSVEC;
336         }
contains(int c)337         boolean contains(int c) {
338             // systematic, directly stored properties
339             return (getAdditional(c, column)&mask)!=0;
340         }
341     }
342 
343     private class CaseBinaryProperty extends BinaryProperty {  // case mapping properties
344         int which;
CaseBinaryProperty(int which)345         CaseBinaryProperty(int which) {
346             super(SRC_CASE);
347             this.which=which;
348         }
349         @Override
contains(int c)350         boolean contains(int c) {
351             return UCaseProps.INSTANCE.hasBinaryProperty(c, which);
352         }
353     }
354 
355     private class NormInertBinaryProperty extends BinaryProperty {  // UCHAR_NF*_INERT properties
356         int which;
NormInertBinaryProperty(int source, int which)357         NormInertBinaryProperty(int source, int which) {
358             super(source);
359             this.which=which;
360         }
361         @Override
contains(int c)362         boolean contains(int c) {
363             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_INERT).isInert(c);
364         }
365     }
366 
367     BinaryProperty[] binProps={
368         /*
369          * Binary-property implementations must be in order of corresponding UProperty,
370          * and there must be exactly one entry per binary UProperty.
371          */
372         new BinaryProperty(1, (1<<ALPHABETIC_PROPERTY_)),
373         new BinaryProperty(1, (1<<ASCII_HEX_DIGIT_PROPERTY_)),
374         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_CONTROL
375             @Override
376             boolean contains(int c) {
377                 return UBiDiProps.INSTANCE.isBidiControl(c);
378             }
379         },
380         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_MIRRORED
381             @Override
382             boolean contains(int c) {
383                 return UBiDiProps.INSTANCE.isMirrored(c);
384             }
385         },
386         new BinaryProperty(1, (1<<DASH_PROPERTY_)),
387         new BinaryProperty(1, (1<<DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_)),
388         new BinaryProperty(1, (1<<DEPRECATED_PROPERTY_)),
389         new BinaryProperty(1, (1<<DIACRITIC_PROPERTY_)),
390         new BinaryProperty(1, (1<<EXTENDER_PROPERTY_)),
391         new BinaryProperty(SRC_NFC) {  // UCHAR_FULL_COMPOSITION_EXCLUSION
392             @Override
393             boolean contains(int c) {
394                 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
395                 Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl;
396                 return impl.isCompNo(impl.getNorm16(c));
397             }
398         },
399         new BinaryProperty(1, (1<<GRAPHEME_BASE_PROPERTY_)),
400         new BinaryProperty(1, (1<<GRAPHEME_EXTEND_PROPERTY_)),
401         new BinaryProperty(1, (1<<GRAPHEME_LINK_PROPERTY_)),
402         new BinaryProperty(1, (1<<HEX_DIGIT_PROPERTY_)),
403         new BinaryProperty(1, (1<<HYPHEN_PROPERTY_)),
404         new BinaryProperty(1, (1<<ID_CONTINUE_PROPERTY_)),
405         new BinaryProperty(1, (1<<ID_START_PROPERTY_)),
406         new BinaryProperty(1, (1<<IDEOGRAPHIC_PROPERTY_)),
407         new BinaryProperty(1, (1<<IDS_BINARY_OPERATOR_PROPERTY_)),
408         new BinaryProperty(1, (1<<IDS_TRINARY_OPERATOR_PROPERTY_)),
409         new BinaryProperty(SRC_BIDI) {  // UCHAR_JOIN_CONTROL
410             @Override
411             boolean contains(int c) {
412                 return UBiDiProps.INSTANCE.isJoinControl(c);
413             }
414         },
415         new BinaryProperty(1, (1<<LOGICAL_ORDER_EXCEPTION_PROPERTY_)),
416         new CaseBinaryProperty(UProperty.LOWERCASE),
417         new BinaryProperty(1, (1<<MATH_PROPERTY_)),
418         new BinaryProperty(1, (1<<NONCHARACTER_CODE_POINT_PROPERTY_)),
419         new BinaryProperty(1, (1<<QUOTATION_MARK_PROPERTY_)),
420         new BinaryProperty(1, (1<<RADICAL_PROPERTY_)),
421         new CaseBinaryProperty(UProperty.SOFT_DOTTED),
422         new BinaryProperty(1, (1<<TERMINAL_PUNCTUATION_PROPERTY_)),
423         new BinaryProperty(1, (1<<UNIFIED_IDEOGRAPH_PROPERTY_)),
424         new CaseBinaryProperty(UProperty.UPPERCASE),
425         new BinaryProperty(1, (1<<WHITE_SPACE_PROPERTY_)),
426         new BinaryProperty(1, (1<<XID_CONTINUE_PROPERTY_)),
427         new BinaryProperty(1, (1<<XID_START_PROPERTY_)),
428         new CaseBinaryProperty(UProperty.CASE_SENSITIVE),
429         new BinaryProperty(1, (1<<S_TERM_PROPERTY_)),
430         new BinaryProperty(1, (1<<VARIATION_SELECTOR_PROPERTY_)),
431         new NormInertBinaryProperty(SRC_NFC, UProperty.NFD_INERT),
432         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKD_INERT),
433         new NormInertBinaryProperty(SRC_NFC, UProperty.NFC_INERT),
434         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKC_INERT),
435         new BinaryProperty(SRC_NFC_CANON_ITER) {  // UCHAR_SEGMENT_STARTER
436             @Override
437             boolean contains(int c) {
438                 return Norm2AllModes.getNFCInstance().impl.
439                     ensureCanonIterData().isCanonSegmentStarter(c);
440             }
441         },
442         new BinaryProperty(1, (1<<PATTERN_SYNTAX)),
443         new BinaryProperty(1, (1<<PATTERN_WHITE_SPACE)),
444         new BinaryProperty(SRC_CHAR_AND_PROPSVEC) {  // UCHAR_POSIX_ALNUM
445             @Override
446             boolean contains(int c) {
447                 return UCharacter.isUAlphabetic(c) || UCharacter.isDigit(c);
448             }
449         },
450         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_BLANK
451             @Override
452             boolean contains(int c) {
453                 // "horizontal space"
454                 if(c<=0x9f) {
455                     return c==9 || c==0x20; /* TAB or SPACE */
456                 } else {
457                     /* Zs */
458                     return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR;
459                 }
460             }
461         },
462         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_GRAPH
463             @Override
464             boolean contains(int c) {
465                 return isgraphPOSIX(c);
466             }
467         },
468         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_PRINT
469             @Override
470             boolean contains(int c) {
471                 /*
472                  * Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}.
473                  *
474                  * The only cntrl character in graph+blank is TAB (in blank).
475                  * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
476                  */
477                 return (UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(c);
478             }
479         },
480         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_XDIGIT
481             @Override
482             boolean contains(int c) {
483                 /* check ASCII and Fullwidth ASCII a-fA-F */
484                 if(
485                     (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
486                     (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
487                 ) {
488                     return true;
489                 }
490                 return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER;
491             }
492         },
493         new CaseBinaryProperty(UProperty.CASED),
494         new CaseBinaryProperty(UProperty.CASE_IGNORABLE),
495         new CaseBinaryProperty(UProperty.CHANGES_WHEN_LOWERCASED),
496         new CaseBinaryProperty(UProperty.CHANGES_WHEN_UPPERCASED),
497         new CaseBinaryProperty(UProperty.CHANGES_WHEN_TITLECASED),
498         new BinaryProperty(SRC_CASE_AND_NORM) {  // UCHAR_CHANGES_WHEN_CASEFOLDED
499             @Override
500             boolean contains(int c) {
501                 String nfd=Norm2AllModes.getNFCInstance().impl.getDecomposition(c);
502                 if(nfd!=null) {
503                     /* c has a decomposition */
504                     c=nfd.codePointAt(0);
505                     if(Character.charCount(c)!=nfd.length()) {
506                         /* multiple code points */
507                         c=-1;
508                     }
509                 } else if(c<0) {
510                     return false;  /* protect against bad input */
511                 }
512                 if(c>=0) {
513                     /* single code point */
514                     UCaseProps csp=UCaseProps.INSTANCE;
515                     UCaseProps.dummyStringBuilder.setLength(0);
516                     return csp.toFullFolding(c, UCaseProps.dummyStringBuilder,
517                                              UCharacter.FOLD_CASE_DEFAULT)>=0;
518                 } else {
519                     String folded=UCharacter.foldCase(nfd, true);
520                     return !folded.equals(nfd);
521                 }
522             }
523         },
524         new CaseBinaryProperty(UProperty.CHANGES_WHEN_CASEMAPPED),
525         new BinaryProperty(SRC_NFKC_CF) {  // UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
526             @Override
527             boolean contains(int c) {
528                 Normalizer2Impl kcf=Norm2AllModes.getNFKC_CFInstance().impl;
529                 String src=UTF16.valueOf(c);
530                 StringBuilder dest=new StringBuilder();
531                 // Small destCapacity for NFKC_CF(c).
532                 Normalizer2Impl.ReorderingBuffer buffer=new Normalizer2Impl.ReorderingBuffer(kcf, dest, 5);
533                 kcf.compose(src, 0, src.length(), false, true, buffer);
534                 return !Normalizer2Impl.UTF16Plus.equal(dest, src);
535             }
536         },
537         new BinaryProperty(2, 1<<PROPS_2_EMOJI),
538         new BinaryProperty(2, 1<<PROPS_2_EMOJI_PRESENTATION),
539         new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER),
540         new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER_BASE),
541         new BinaryProperty(2, 1<<PROPS_2_EMOJI_COMPONENT),
542         new BinaryProperty(SRC_PROPSVEC) {  // REGIONAL_INDICATOR
543             // Property starts are a subset of lb=RI etc.
544             @Override
545             boolean contains(int c) {
546                 return 0x1F1E6<=c && c<=0x1F1FF;
547             }
548         },
549         new BinaryProperty(1, 1<<PREPENDED_CONCATENATION_MARK),
550         new BinaryProperty(2, 1<<PROPS_2_EXTENDED_PICTOGRAPHIC),
551     };
552 
hasBinaryProperty(int c, int which)553     public boolean hasBinaryProperty(int c, int which) {
554          if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) {
555             // not a known binary property
556             return false;
557         } else {
558             return binProps[which].contains(c);
559         }
560     }
561 
562     // int-value and enumerated properties --------------------------------- ***
563 
getType(int c)564     public int getType(int c) {
565         return getProperty(c)&TYPE_MASK;
566     }
567 
568     /*
569      * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
570      * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
571      */
572     private static final int /* UHangulSyllableType */ gcbToHst[]={
573         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_OTHER */
574         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CONTROL */
575         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CR */
576         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_EXTEND */
577         HangulSyllableType.LEADING_JAMO,     /* U_GCB_L */
578         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_LF */
579         HangulSyllableType.LV_SYLLABLE,      /* U_GCB_LV */
580         HangulSyllableType.LVT_SYLLABLE,     /* U_GCB_LVT */
581         HangulSyllableType.TRAILING_JAMO,    /* U_GCB_T */
582         HangulSyllableType.VOWEL_JAMO        /* U_GCB_V */
583         /*
584          * Omit GCB values beyond what we need for hst.
585          * The code below checks for the array length.
586          */
587     };
588 
589     private class IntProperty {
590         int column;  // SRC_PROPSVEC column, or "source" if mask==0
591         int mask;
592         int shift;
IntProperty(int column, int mask, int shift)593         IntProperty(int column, int mask, int shift) {
594             this.column=column;
595             this.mask=mask;
596             this.shift=shift;
597         }
IntProperty(int source)598         IntProperty(int source) {
599             this.column=source;
600             this.mask=0;
601         }
getSource()602         final int getSource() {
603             return mask==0 ? column : SRC_PROPSVEC;
604         }
getValue(int c)605         int getValue(int c) {
606             // systematic, directly stored properties
607             return (getAdditional(c, column)&mask)>>>shift;
608         }
getMaxValue(int which)609         int getMaxValue(int which) {
610             return (getMaxValues(column)&mask)>>>shift;
611         }
612     }
613 
614     private class BiDiIntProperty extends IntProperty {
BiDiIntProperty()615         BiDiIntProperty() {
616             super(SRC_BIDI);
617         }
618         @Override
getMaxValue(int which)619         int getMaxValue(int which) {
620             return UBiDiProps.INSTANCE.getMaxValue(which);
621         }
622     }
623 
624     private class CombiningClassIntProperty extends IntProperty {
CombiningClassIntProperty(int source)625         CombiningClassIntProperty(int source) {
626             super(source);
627         }
628         @Override
getMaxValue(int which)629         int getMaxValue(int which) {
630             return 0xff;
631         }
632     }
633 
634     private class NormQuickCheckIntProperty extends IntProperty {  // UCHAR_NF*_QUICK_CHECK properties
635         int which;
636         int max;
NormQuickCheckIntProperty(int source, int which, int max)637         NormQuickCheckIntProperty(int source, int which, int max) {
638             super(source);
639             this.which=which;
640             this.max=max;
641         }
642         @Override
getValue(int c)643         int getValue(int c) {
644             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_QUICK_CHECK).getQuickCheck(c);
645         }
646         @Override
getMaxValue(int which)647         int getMaxValue(int which) {
648             return max;
649         }
650     }
651 
652     IntProperty intProps[]={
653         new BiDiIntProperty() {  // BIDI_CLASS
654             @Override
655             int getValue(int c) {
656                 return UBiDiProps.INSTANCE.getClass(c);
657             }
658         },
659         new IntProperty(0, BLOCK_MASK_, BLOCK_SHIFT_),
660         new CombiningClassIntProperty(SRC_NFC) {  // CANONICAL_COMBINING_CLASS
661             @Override
662             int getValue(int c) {
663                 return Normalizer2.getNFDInstance().getCombiningClass(c);
664             }
665         },
666         new IntProperty(2, DECOMPOSITION_TYPE_MASK_, 0),
667         new IntProperty(0, EAST_ASIAN_MASK_, EAST_ASIAN_SHIFT_),
668         new IntProperty(SRC_CHAR) {  // GENERAL_CATEGORY
669             @Override
670             int getValue(int c) {
671                 return getType(c);
672             }
673             @Override
674             int getMaxValue(int which) {
675                 return UCharacterCategory.CHAR_CATEGORY_COUNT-1;
676             }
677         },
678         new BiDiIntProperty() {  // JOINING_GROUP
679             @Override
680             int getValue(int c) {
681                 return UBiDiProps.INSTANCE.getJoiningGroup(c);
682             }
683         },
684         new BiDiIntProperty() {  // JOINING_TYPE
685             @Override
686             int getValue(int c) {
687                 return UBiDiProps.INSTANCE.getJoiningType(c);
688             }
689         },
690         new IntProperty(2, LB_MASK, LB_SHIFT),  // LINE_BREAK
691         new IntProperty(SRC_CHAR) {  // NUMERIC_TYPE
692             @Override
693             int getValue(int c) {
694                 return ntvGetType(getNumericTypeValue(getProperty(c)));
695             }
696             @Override
697             int getMaxValue(int which) {
698                 return NumericType.COUNT-1;
699             }
700         },
701         new IntProperty(SRC_PROPSVEC) {
702             @Override
703             int getValue(int c) {
704                 return UScript.getScript(c);
705             }
706             @Override
707             int getMaxValue(int which) {
708                 int scriptX=getMaxValues(0)&SCRIPT_X_MASK;
709                 return mergeScriptCodeOrIndex(scriptX);
710             }
711         },
712         new IntProperty(SRC_PROPSVEC) {  // HANGUL_SYLLABLE_TYPE
713             @Override
714             int getValue(int c) {
715                 /* see comments on gcbToHst[] above */
716                 int gcb=(getAdditional(c, 2)&GCB_MASK)>>>GCB_SHIFT;
717                 if(gcb<gcbToHst.length) {
718                     return gcbToHst[gcb];
719                 } else {
720                     return HangulSyllableType.NOT_APPLICABLE;
721                 }
722             }
723             @Override
724             int getMaxValue(int which) {
725                 return HangulSyllableType.COUNT-1;
726             }
727         },
728         // max=1=YES -- these are never "maybe", only "no" or "yes"
729         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFD_QUICK_CHECK, 1),
730         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKD_QUICK_CHECK, 1),
731         // max=2=MAYBE
732         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFC_QUICK_CHECK, 2),
733         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKC_QUICK_CHECK, 2),
734         new CombiningClassIntProperty(SRC_NFC) {  // LEAD_CANONICAL_COMBINING_CLASS
735             @Override
736             int getValue(int c) {
737                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)>>8;
738             }
739         },
740         new CombiningClassIntProperty(SRC_NFC) {  // TRAIL_CANONICAL_COMBINING_CLASS
741             @Override
742             int getValue(int c) {
743                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)&0xff;
744             }
745         },
746         new IntProperty(2, GCB_MASK, GCB_SHIFT),  // GRAPHEME_CLUSTER_BREAK
747         new IntProperty(2, SB_MASK, SB_SHIFT),  // SENTENCE_BREAK
748         new IntProperty(2, WB_MASK, WB_SHIFT),  // WORD_BREAK
749         new BiDiIntProperty() {  // BIDI_PAIRED_BRACKET_TYPE
750             @Override
751             int getValue(int c) {
752                 return UBiDiProps.INSTANCE.getPairedBracketType(c);
753             }
754         },
755         new IntProperty(SRC_INPC) {
756             @Override
757             int getValue(int c) {
758                 CodePointTrie trie = LayoutProps.INSTANCE.inpcTrie;
759                 return trie != null ? trie.get(c) : 0;
760             }
761             @Override
762             int getMaxValue(int which) {
763                 return LayoutProps.INSTANCE.maxInpcValue;
764             }
765         },
766         new IntProperty(SRC_INSC) {
767             @Override
768             int getValue(int c) {
769                 CodePointTrie trie = LayoutProps.INSTANCE.inscTrie;
770                 return trie != null ? trie.get(c) : 0;
771             }
772             @Override
773             int getMaxValue(int which) {
774                 return LayoutProps.INSTANCE.maxInscValue;
775             }
776         },
777         new IntProperty(SRC_VO) {
778             @Override
779             int getValue(int c) {
780                 CodePointTrie trie = LayoutProps.INSTANCE.voTrie;
781                 return trie != null ? trie.get(c) : 0;
782             }
783             @Override
784             int getMaxValue(int which) {
785                 return LayoutProps.INSTANCE.maxVoValue;
786             }
787         },
788     };
789 
getIntPropertyValue(int c, int which)790     public int getIntPropertyValue(int c, int which) {
791         if(which<UProperty.INT_START) {
792             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
793                 return binProps[which].contains(c) ? 1 : 0;
794             }
795         } else if(which<UProperty.INT_LIMIT) {
796             return intProps[which-UProperty.INT_START].getValue(c);
797         } else if (which == UProperty.GENERAL_CATEGORY_MASK) {
798             return getMask(getType(c));
799         }
800         return 0; // undefined
801     }
802 
getIntPropertyMaxValue(int which)803     public int getIntPropertyMaxValue(int which) {
804         if(which<UProperty.INT_START) {
805             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
806                 return 1;  // maximum TRUE for all binary properties
807             }
808         } else if(which<UProperty.INT_LIMIT) {
809             return intProps[which-UProperty.INT_START].getMaxValue(which);
810         }
811         return -1; // undefined
812     }
813 
getSource(int which)814     final int getSource(int which) {
815         if(which<UProperty.BINARY_START) {
816             return SRC_NONE; /* undefined */
817         } else if(which<UProperty.BINARY_LIMIT) {
818             return binProps[which].getSource();
819         } else if(which<UProperty.INT_START) {
820             return SRC_NONE; /* undefined */
821         } else if(which<UProperty.INT_LIMIT) {
822             return intProps[which-UProperty.INT_START].getSource();
823         } else if(which<UProperty.STRING_START) {
824             switch(which) {
825             case UProperty.GENERAL_CATEGORY_MASK:
826             case UProperty.NUMERIC_VALUE:
827                 return SRC_CHAR;
828 
829             default:
830                 return SRC_NONE;
831             }
832         } else if(which<UProperty.STRING_LIMIT) {
833             switch(which) {
834             case UProperty.AGE:
835                 return SRC_PROPSVEC;
836 
837             case UProperty.BIDI_MIRRORING_GLYPH:
838                 return SRC_BIDI;
839 
840             case UProperty.CASE_FOLDING:
841             case UProperty.LOWERCASE_MAPPING:
842             case UProperty.SIMPLE_CASE_FOLDING:
843             case UProperty.SIMPLE_LOWERCASE_MAPPING:
844             case UProperty.SIMPLE_TITLECASE_MAPPING:
845             case UProperty.SIMPLE_UPPERCASE_MAPPING:
846             case UProperty.TITLECASE_MAPPING:
847             case UProperty.UPPERCASE_MAPPING:
848                 return SRC_CASE;
849 
850             case UProperty.ISO_COMMENT:
851             case UProperty.NAME:
852             case UProperty.UNICODE_1_NAME:
853                 return SRC_NAMES;
854 
855             default:
856                 return SRC_NONE;
857             }
858         } else {
859             switch(which) {
860             case UProperty.SCRIPT_EXTENSIONS:
861                 return SRC_PROPSVEC;
862             default:
863                 return SRC_NONE; /* undefined */
864             }
865         }
866     }
867 
868     /**
869      * <p>
870      * Unicode property names and property value names are compared
871      * "loosely". Property[Value]Aliases.txt say:
872      * <quote>
873      *   "With loose matching of property names, the case distinctions,
874      *    whitespace, and '_' are ignored."
875      * </quote>
876      * </p>
877      * <p>
878      * This function does just that, for ASCII (char *) name strings.
879      * It is almost identical to ucnv_compareNames() but also ignores
880      * ASCII White_Space characters (U+0009..U+000d).
881      * </p>
882      * @param name1 name to compare
883      * @param name2 name to compare
884      * @return 0 if names are equal, < 0 if name1 is less than name2 and > 0
885      *         if name1 is greater than name2.
886      */
887     /* to be implemented in 2.4
888      * public static int comparePropertyNames(String name1, String name2)
889     {
890         int result = 0;
891         int i1 = 0;
892         int i2 = 0;
893         while (true) {
894             char ch1 = 0;
895             char ch2 = 0;
896             // Ignore delimiters '-', '_', and ASCII White_Space
897             if (i1 < name1.length()) {
898                 ch1 = name1.charAt(i1 ++);
899             }
900             while (ch1 == '-' || ch1 == '_' || ch1 == ' ' || ch1 == '\t'
901                    || ch1 == '\n' // synwee what is || ch1 == '\v'
902                    || ch1 == '\f' || ch1=='\r') {
903                 if (i1 < name1.length()) {
904                     ch1 = name1.charAt(i1 ++);
905                 }
906                 else {
907                     ch1 = 0;
908                 }
909             }
910             if (i2 < name2.length()) {
911                 ch2 = name2.charAt(i2 ++);
912             }
913             while (ch2 == '-' || ch2 == '_' || ch2 == ' ' || ch2 == '\t'
914                    || ch2 == '\n' // synwee what is || ch1 == '\v'
915                    || ch2 == '\f' || ch2=='\r') {
916                 if (i2 < name2.length()) {
917                     ch2 = name2.charAt(i2 ++);
918                 }
919                 else {
920                     ch2 = 0;
921                 }
922             }
923 
924             // If we reach the ends of both strings then they match
925             if (ch1 == 0 && ch2 == 0) {
926                 return 0;
927             }
928 
929             // Case-insensitive comparison
930             if (ch1 != ch2) {
931                 result = Character.toLowerCase(ch1)
932                                                 - Character.toLowerCase(ch2);
933                 if (result != 0) {
934                     return result;
935                 }
936             }
937         }
938     }
939     */
940 
941     /**
942      * Get the the maximum values for some enum/int properties.
943      * @return maximum values for the integer properties.
944      */
getMaxValues(int column)945     public int getMaxValues(int column)
946     {
947        // return m_maxBlockScriptValue_;
948 
949         switch(column) {
950         case 0:
951             return m_maxBlockScriptValue_;
952         case 2:
953             return m_maxJTGValue_;
954         default:
955             return 0;
956         }
957     }
958 
959     /**
960      * Gets the type mask
961      * @param type character type
962      * @return mask
963      */
getMask(int type)964     public static final int getMask(int type)
965     {
966         return 1 << type;
967     }
968 
969 
970     /**
971      * Returns the digit values of characters like 'A' - 'Z', normal,
972      * half-width and full-width. This method assumes that the other digit
973      * characters are checked by the calling method.
974      * @param ch character to test
975      * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
976      *         its corresponding digit will be returned.
977      */
getEuropeanDigit(int ch)978     public static int getEuropeanDigit(int ch) {
979         if ((ch > 0x7a && ch < 0xff21)
980             || ch < 0x41 || (ch > 0x5a && ch < 0x61)
981             || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
982             return -1;
983         }
984         if (ch <= 0x7a) {
985             // ch >= 0x41 or ch < 0x61
986             return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
987         }
988         // ch >= 0xff21
989         if (ch <= 0xff3a) {
990             return ch + 10 - 0xff21;
991         }
992         // ch >= 0xff41 && ch <= 0xff5a
993         return ch + 10 - 0xff41;
994     }
995 
digit(int c)996     public int digit(int c) {
997         int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
998         if(value<=9) {
999             return value;
1000         } else {
1001             return -1;
1002         }
1003     }
1004 
getNumericValue(int c)1005     public int getNumericValue(int c) {
1006         // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
1007         int ntv = getNumericTypeValue(getProperty(c));
1008 
1009         if(ntv==NTV_NONE_) {
1010             return getEuropeanDigit(c);
1011         } else if(ntv<NTV_DIGIT_START_) {
1012             /* decimal digit */
1013             return ntv-NTV_DECIMAL_START_;
1014         } else if(ntv<NTV_NUMERIC_START_) {
1015             /* other digit */
1016             return ntv-NTV_DIGIT_START_;
1017         } else if(ntv<NTV_FRACTION_START_) {
1018             /* small integer */
1019             return ntv-NTV_NUMERIC_START_;
1020         } else if(ntv<NTV_LARGE_START_) {
1021             /* fraction */
1022             return -2;
1023         } else if(ntv<NTV_BASE60_START_) {
1024             /* large, single-significant-digit integer */
1025             int mant=(ntv>>5)-14;
1026             int exp=(ntv&0x1f)+2;
1027             if(exp<9 || (exp==9 && mant<=2)) {
1028                 int numValue=mant;
1029                 do {
1030                     numValue*=10;
1031                 } while(--exp>0);
1032                 return numValue;
1033             } else {
1034                 return -2;
1035             }
1036         } else if(ntv<NTV_FRACTION20_START_) {
1037             /* sexagesimal (base 60) integer */
1038             int numValue=(ntv>>2)-0xbf;
1039             int exp=(ntv&3)+1;
1040 
1041             switch(exp) {
1042             case 4:
1043                 numValue*=60*60*60*60;
1044                 break;
1045             case 3:
1046                 numValue*=60*60*60;
1047                 break;
1048             case 2:
1049                 numValue*=60*60;
1050                 break;
1051             case 1:
1052                 numValue*=60;
1053                 break;
1054             case 0:
1055             default:
1056                 break;
1057             }
1058 
1059             return numValue;
1060         } else if(ntv<NTV_RESERVED_START_) {
1061             // fraction-20 e.g. 3/80
1062             return -2;
1063         } else {
1064             /* reserved */
1065             return -2;
1066         }
1067     }
1068 
getUnicodeNumericValue(int c)1069     public double getUnicodeNumericValue(int c) {
1070         // equivalent to c version double u_getNumericValue(UChar32 c)
1071         int ntv = getNumericTypeValue(getProperty(c));
1072 
1073         if(ntv==NTV_NONE_) {
1074             return UCharacter.NO_NUMERIC_VALUE;
1075         } else if(ntv<NTV_DIGIT_START_) {
1076             /* decimal digit */
1077             return ntv-NTV_DECIMAL_START_;
1078         } else if(ntv<NTV_NUMERIC_START_) {
1079             /* other digit */
1080             return ntv-NTV_DIGIT_START_;
1081         } else if(ntv<NTV_FRACTION_START_) {
1082             /* small integer */
1083             return ntv-NTV_NUMERIC_START_;
1084         } else if(ntv<NTV_LARGE_START_) {
1085             /* fraction */
1086             int numerator=(ntv>>4)-12;
1087             int denominator=(ntv&0xf)+1;
1088             return (double)numerator/denominator;
1089         } else if(ntv<NTV_BASE60_START_) {
1090             /* large, single-significant-digit integer */
1091             double numValue;
1092             int mant=(ntv>>5)-14;
1093             int exp=(ntv&0x1f)+2;
1094             numValue=mant;
1095 
1096             /* multiply by 10^exp without math.h */
1097             while(exp>=4) {
1098                 numValue*=10000.;
1099                 exp-=4;
1100             }
1101             switch(exp) {
1102             case 3:
1103                 numValue*=1000.;
1104                 break;
1105             case 2:
1106                 numValue*=100.;
1107                 break;
1108             case 1:
1109                 numValue*=10.;
1110                 break;
1111             case 0:
1112             default:
1113                 break;
1114             }
1115 
1116             return numValue;
1117         } else if(ntv<NTV_FRACTION20_START_) {
1118             /* sexagesimal (base 60) integer */
1119             int numValue=(ntv>>2)-0xbf;
1120             int exp=(ntv&3)+1;
1121 
1122             switch(exp) {
1123             case 4:
1124                 numValue*=60*60*60*60;
1125                 break;
1126             case 3:
1127                 numValue*=60*60*60;
1128                 break;
1129             case 2:
1130                 numValue*=60*60;
1131                 break;
1132             case 1:
1133                 numValue*=60;
1134                 break;
1135             case 0:
1136             default:
1137                 break;
1138             }
1139 
1140             return numValue;
1141         } else if(ntv<NTV_FRACTION32_START_) {
1142             // fraction-20 e.g. 3/80
1143             int frac20=ntv-NTV_FRACTION20_START_;  // 0..0x17
1144             int numerator=2*(frac20&3)+1;
1145             int denominator=20<<(frac20>>2);
1146             return (double)numerator/denominator;
1147         } else if(ntv<NTV_RESERVED_START_) {
1148             // fraction-32 e.g. 3/64
1149             int frac32=ntv-NTV_FRACTION32_START_;  // 0..15
1150             int numerator=2*(frac32&3)+1;
1151             int denominator=32<<(frac32>>2);
1152             return (double)numerator/denominator;
1153         } else {
1154             /* reserved */
1155             return UCharacter.NO_NUMERIC_VALUE;
1156         }
1157     }
1158 
1159     // protected variables -----------------------------------------------
1160 
1161     /**
1162      * Extra property trie
1163      */
1164     Trie2_16 m_additionalTrie_;
1165     /**
1166      * Extra property vectors, 1st column for age and second for binary
1167      * properties.
1168      */
1169     int m_additionalVectors_[];
1170     /**
1171      * Number of additional columns
1172      */
1173     int m_additionalColumnsCount_;
1174     /**
1175      * Maximum values for block, bits used as in vector word
1176      * 0
1177      */
1178     int m_maxBlockScriptValue_;
1179     /**
1180      * Maximum values for script, bits used as in vector word
1181      * 0
1182      */
1183      int m_maxJTGValue_;
1184 
1185     /**
1186      * Script_Extensions data
1187      */
1188     public char[] m_scriptExtensions_;
1189 
1190     // private variables -------------------------------------------------
1191 
1192     /**
1193     * Default name of the datafile
1194     */
1195     private static final String DATA_FILE_NAME_ = "uprops.icu";
1196 
1197     // property data constants -------------------------------------------------
1198 
1199     /**
1200      * Numeric types and values in the main properties words.
1201      */
1202     private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
getNumericTypeValue(int props)1203     private static final int getNumericTypeValue(int props) {
1204         return props >> NUMERIC_TYPE_VALUE_SHIFT_;
1205     }
1206     /* constants for the storage form of numeric types and values */
1207     /** No numeric value. */
1208     private static final int NTV_NONE_ = 0;
1209     /** Decimal digits: nv=0..9 */
1210     private static final int NTV_DECIMAL_START_ = 1;
1211     /** Other digits: nv=0..9 */
1212     private static final int NTV_DIGIT_START_ = 11;
1213     /** Small integers: nv=0..154 */
1214     private static final int NTV_NUMERIC_START_ = 21;
1215     /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
1216     private static final int NTV_FRACTION_START_ = 0xb0;
1217     /**
1218      * Large integers:
1219      * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
1220      * (only one significant decimal digit)
1221      */
1222     private static final int NTV_LARGE_START_ = 0x1e0;
1223     /**
1224      * Sexagesimal numbers:
1225      * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
1226      */
1227     private static final int NTV_BASE60_START_=0x300;
1228     /**
1229      * Fraction-20 values:
1230      * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
1231      * numerator: num = 2*(frac20&3)+1
1232      * denominator: den = 20<<(frac20>>2)
1233      */
1234     private static final int NTV_FRACTION20_START_ = NTV_BASE60_START_ + 36;  // 0x300+9*4=0x324
1235     /**
1236      * Fraction-32 values:
1237      * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
1238      * numerator: num = 2*(frac32&3)+1
1239      * denominator: den = 32<<(frac32>>2)
1240      */
1241     private static final int NTV_FRACTION32_START_ = NTV_FRACTION20_START_ + 24;  // 0x324+6*4=0x34c
1242     /** No numeric value (yet). */
1243     private static final int NTV_RESERVED_START_ = NTV_FRACTION32_START_ + 16;  // 0x34c+4*4=0x35c
1244 
ntvGetType(int ntv)1245     private static final int ntvGetType(int ntv) {
1246         return
1247             (ntv==NTV_NONE_) ? NumericType.NONE :
1248             (ntv<NTV_DIGIT_START_) ?  NumericType.DECIMAL :
1249             (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
1250             NumericType.NUMERIC;
1251     }
1252 
1253     /*
1254      * Properties in vector word 0
1255      * Bits
1256      * 31..24   DerivedAge version major/minor one nibble each
1257      * 23..22   3..1: Bits 21..20 & 7..0 = Script_Extensions index
1258      *             3: Script value from Script_Extensions
1259      *             2: Script=Inherited
1260      *             1: Script=Common
1261      *             0: Script=bits 21..20 & 7..0
1262      * 21..20   Bits 9..8 of the UScriptCode, or index to Script_Extensions
1263      * 19..17   East Asian Width
1264      * 16.. 8   UBlockCode
1265      *  7.. 0   UScriptCode, or index to Script_Extensions
1266      */
1267 
1268     /**
1269      * Script_Extensions: mask includes Script
1270      */
1271     public static final int SCRIPT_X_MASK = 0x00f000ff;
1272     //private static final int SCRIPT_X_SHIFT = 22;
1273 
1274     // The UScriptCode or Script_Extensions index is split across two bit fields.
1275     // (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.)
1276     // Shift the high bits right by 12 to assemble the full value.
1277     public static final int SCRIPT_HIGH_MASK = 0x00300000;
1278     public static final int SCRIPT_HIGH_SHIFT = 12;
1279     public static final int MAX_SCRIPT = 0x3ff;
1280 
1281     /**
1282      * Integer properties mask and shift values for East Asian cell width.
1283      * Equivalent to icu4c UPROPS_EA_MASK
1284      */
1285     private static final int EAST_ASIAN_MASK_ = 0x000e0000;
1286     /**
1287      * Integer properties mask and shift values for East Asian cell width.
1288      * Equivalent to icu4c UPROPS_EA_SHIFT
1289      */
1290     private static final int EAST_ASIAN_SHIFT_ = 17;
1291     /**
1292      * Integer properties mask and shift values for blocks.
1293      * Equivalent to icu4c UPROPS_BLOCK_MASK
1294      */
1295     private static final int BLOCK_MASK_ = 0x0001ff00;
1296     /**
1297      * Integer properties mask and shift values for blocks.
1298      * Equivalent to icu4c UPROPS_BLOCK_SHIFT
1299      */
1300     private static final int BLOCK_SHIFT_ = 8;
1301     /**
1302      * Integer properties mask and shift values for scripts.
1303      * Equivalent to icu4c UPROPS_SHIFT_LOW_MASK.
1304      */
1305     public static final int SCRIPT_LOW_MASK = 0x000000ff;
1306 
1307     /* SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
1308     public static final int SCRIPT_X_WITH_COMMON = 0x400000;
1309     public static final int SCRIPT_X_WITH_INHERITED = 0x800000;
1310     public static final int SCRIPT_X_WITH_OTHER = 0xc00000;
1311 
mergeScriptCodeOrIndex(int scriptX)1312     public static final int mergeScriptCodeOrIndex(int scriptX) {
1313         return
1314             ((scriptX & SCRIPT_HIGH_MASK) >> SCRIPT_HIGH_SHIFT) |
1315             (scriptX & SCRIPT_LOW_MASK);
1316     }
1317 
1318     /**
1319      * Additional properties used in internal trie data
1320      */
1321     /*
1322      * Properties in vector word 1
1323      * Each bit encodes one binary property.
1324      * The following constants represent the bit number, use 1<<UPROPS_XYZ.
1325      * UPROPS_BINARY_1_TOP<=32!
1326      *
1327      * Keep this list of property enums in sync with
1328      * propListNames[] in icu/source/tools/genprops/props2.c!
1329      *
1330      * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
1331      */
1332     private static final int WHITE_SPACE_PROPERTY_ = 0;
1333     private static final int DASH_PROPERTY_ = 1;
1334     private static final int HYPHEN_PROPERTY_ = 2;
1335     private static final int QUOTATION_MARK_PROPERTY_ = 3;
1336     private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
1337     private static final int MATH_PROPERTY_ = 5;
1338     private static final int HEX_DIGIT_PROPERTY_ = 6;
1339     private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
1340     private static final int ALPHABETIC_PROPERTY_ = 8;
1341     private static final int IDEOGRAPHIC_PROPERTY_ = 9;
1342     private static final int DIACRITIC_PROPERTY_ = 10;
1343     private static final int EXTENDER_PROPERTY_ = 11;
1344     private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
1345     private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
1346     private static final int GRAPHEME_LINK_PROPERTY_ = 14;
1347     private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
1348     private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
1349     private static final int RADICAL_PROPERTY_ = 17;
1350     private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
1351     private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
1352     private static final int DEPRECATED_PROPERTY_ = 20;
1353     private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
1354     private static final int XID_START_PROPERTY_ = 22;
1355     private static final int XID_CONTINUE_PROPERTY_ = 23;
1356     private static final int ID_START_PROPERTY_    = 24;
1357     private static final int ID_CONTINUE_PROPERTY_ = 25;
1358     private static final int GRAPHEME_BASE_PROPERTY_ = 26;
1359     private static final int S_TERM_PROPERTY_ = 27;
1360     private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
1361     private static final int PATTERN_SYNTAX = 29;                   /* new in ICU 3.4 and Unicode 4.1 */
1362     private static final int PATTERN_WHITE_SPACE = 30;
1363     private static final int PREPENDED_CONCATENATION_MARK = 31;     // new in ICU 60 and Unicode 10
1364 
1365     /*
1366      * Properties in vector word 2
1367      * Bits
1368      * 31..26   http://www.unicode.org/reports/tr51/#Emoji_Properties
1369      * 25..20   Line Break
1370      * 19..15   Sentence Break
1371      * 14..10   Word Break
1372      *  9.. 5   Grapheme Cluster Break
1373      *  4.. 0   Decomposition Type
1374      */
1375     private static final int PROPS_2_EXTENDED_PICTOGRAPHIC=26;
1376     private static final int PROPS_2_EMOJI_COMPONENT = 27;
1377     private static final int PROPS_2_EMOJI = 28;
1378     private static final int PROPS_2_EMOJI_PRESENTATION = 29;
1379     private static final int PROPS_2_EMOJI_MODIFIER = 30;
1380     private static final int PROPS_2_EMOJI_MODIFIER_BASE = 31;
1381 
1382     private static final int LB_MASK          = 0x03f00000;
1383     private static final int LB_SHIFT         = 20;
1384 
1385     private static final int SB_MASK          = 0x000f8000;
1386     private static final int SB_SHIFT         = 15;
1387 
1388     private static final int WB_MASK          = 0x00007c00;
1389     private static final int WB_SHIFT         = 10;
1390 
1391     private static final int GCB_MASK         = 0x000003e0;
1392     private static final int GCB_SHIFT        = 5;
1393 
1394     /**
1395      * Integer properties mask for decomposition type.
1396      * Equivalent to icu4c UPROPS_DT_MASK.
1397      */
1398     private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
1399 
1400     /**
1401      * First nibble shift
1402      */
1403     private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
1404     /**
1405      * Second nibble mask
1406      */
1407     private static final int LAST_NIBBLE_MASK_ = 0xF;
1408     /**
1409      * Age value shift
1410      */
1411     private static final int AGE_SHIFT_ = 24;
1412 
1413 
1414     // private constructors --------------------------------------------------
1415 
1416     /**
1417      * Constructor
1418      * @exception IOException thrown when data reading fails or data corrupted
1419      */
UCharacterProperty()1420     private UCharacterProperty() throws IOException
1421     {
1422         // consistency check
1423         if(binProps.length!=UProperty.BINARY_LIMIT) {
1424             throw new ICUException("binProps.length!=UProperty.BINARY_LIMIT");
1425         }
1426         if(intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)) {
1427             throw new ICUException("intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)");
1428         }
1429 
1430         // jar access
1431         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
1432         m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
1433         // Read or skip the 16 indexes.
1434         int propertyOffset = bytes.getInt();
1435         /* exceptionOffset = */ bytes.getInt();
1436         /* caseOffset = */ bytes.getInt();
1437         int additionalOffset = bytes.getInt();
1438         int additionalVectorsOffset = bytes.getInt();
1439         m_additionalColumnsCount_ = bytes.getInt();
1440         int scriptExtensionsOffset = bytes.getInt();
1441         int reservedOffset7 = bytes.getInt();
1442         /* reservedOffset8 = */ bytes.getInt();
1443         /* dataTopOffset = */ bytes.getInt();
1444         m_maxBlockScriptValue_ = bytes.getInt();
1445         m_maxJTGValue_ = bytes.getInt();
1446         ICUBinary.skipBytes(bytes, (16 - 12) << 2);
1447 
1448         // read the main properties trie
1449         m_trie_ = Trie2_16.createFromSerialized(bytes);
1450         int expectedTrieLength = (propertyOffset - 16) * 4;
1451         int trieLength = m_trie_.getSerializedLength();
1452         if(trieLength > expectedTrieLength) {
1453             throw new IOException("uprops.icu: not enough bytes for main trie");
1454         }
1455         // skip padding after trie bytes
1456         ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
1457 
1458         // skip unused intervening data structures
1459         ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);
1460 
1461         if(m_additionalColumnsCount_ > 0) {
1462             // reads the additional property block
1463             m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
1464             expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
1465             trieLength = m_additionalTrie_.getSerializedLength();
1466             if(trieLength > expectedTrieLength) {
1467                 throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
1468             }
1469             // skip padding after trie bytes
1470             ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
1471 
1472             // additional properties
1473             int size = scriptExtensionsOffset - additionalVectorsOffset;
1474             m_additionalVectors_ = ICUBinary.getInts(bytes, size, 0);
1475         }
1476 
1477         // Script_Extensions
1478         int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
1479         if(numChars > 0) {
1480             m_scriptExtensions_ = ICUBinary.getChars(bytes, numChars, 0);
1481         }
1482     }
1483 
1484     private static final class IsAcceptable implements ICUBinary.Authenticate {
1485         @Override
isDataVersionAcceptable(byte version[])1486         public boolean isDataVersionAcceptable(byte version[]) {
1487             return version[0] == 7;
1488         }
1489     }
1490     private static final int DATA_FORMAT = 0x5550726F;  // "UPro"
1491 
1492     // private methods -------------------------------------------------------
1493 
1494     /*
1495      * Compare additional properties to see if it has argument type
1496      * @param property 32 bit properties
1497      * @param type character type
1498      * @return true if property has type
1499      */
1500     /*private boolean compareAdditionalType(int property, int type)
1501     {
1502         return (property & (1 << type)) != 0;
1503     }*/
1504 
1505     // property starts for UnicodeSet -------------------------------------- ***
1506 
1507     private static final int TAB     = 0x0009;
1508     //private static final int LF      = 0x000a;
1509     //private static final int FF      = 0x000c;
1510     private static final int CR      = 0x000d;
1511     private static final int U_A     = 0x0041;
1512     private static final int U_F     = 0x0046;
1513     private static final int U_Z     = 0x005a;
1514     private static final int U_a     = 0x0061;
1515     private static final int U_f     = 0x0066;
1516     private static final int U_z     = 0x007a;
1517     private static final int DEL     = 0x007f;
1518     private static final int NL      = 0x0085;
1519     private static final int NBSP    = 0x00a0;
1520     private static final int CGJ     = 0x034f;
1521     private static final int FIGURESP= 0x2007;
1522     private static final int HAIRSP  = 0x200a;
1523     //private static final int ZWNJ    = 0x200c;
1524     //private static final int ZWJ     = 0x200d;
1525     private static final int RLM     = 0x200f;
1526     private static final int NNBSP   = 0x202f;
1527     private static final int WJ      = 0x2060;
1528     private static final int INHSWAP = 0x206a;
1529     private static final int NOMDIG  = 0x206f;
1530     private static final int U_FW_A  = 0xff21;
1531     private static final int U_FW_F  = 0xff26;
1532     private static final int U_FW_Z  = 0xff3a;
1533     private static final int U_FW_a  = 0xff41;
1534     private static final int U_FW_f  = 0xff46;
1535     private static final int U_FW_z  = 0xff5a;
1536     private static final int ZWNBSP  = 0xfeff;
1537 
addPropertyStarts(UnicodeSet set)1538     public UnicodeSet addPropertyStarts(UnicodeSet set) {
1539         /* add the start code point of each same-value range of the main trie */
1540         Iterator<Trie2.Range> trieIterator = m_trie_.iterator();
1541         Trie2.Range range;
1542         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
1543             set.add(range.startCodePoint);
1544         }
1545 
1546         /* add code points with hardcoded properties, plus the ones following them */
1547 
1548         /* add for u_isblank() */
1549         set.add(TAB);
1550         set.add(TAB+1);
1551 
1552         /* add for IS_THAT_CONTROL_SPACE() */
1553         set.add(CR+1); /* range TAB..CR */
1554         set.add(0x1c);
1555         set.add(0x1f+1);
1556         set.add(NL);
1557         set.add(NL+1);
1558 
1559         /* add for u_isIDIgnorable() what was not added above */
1560         set.add(DEL); /* range DEL..NBSP-1, NBSP added below */
1561         set.add(HAIRSP);
1562         set.add(RLM+1);
1563         set.add(INHSWAP);
1564         set.add(NOMDIG+1);
1565         set.add(ZWNBSP);
1566         set.add(ZWNBSP+1);
1567 
1568         /* add no-break spaces for u_isWhitespace() what was not added above */
1569         set.add(NBSP);
1570         set.add(NBSP+1);
1571         set.add(FIGURESP);
1572         set.add(FIGURESP+1);
1573         set.add(NNBSP);
1574         set.add(NNBSP+1);
1575 
1576         /* add for u_charDigitValue() */
1577         // TODO remove when UCharacter.getHanNumericValue() is changed to just return
1578         // Unicode numeric values
1579         set.add(0x3007);
1580         set.add(0x3008);
1581         set.add(0x4e00);
1582         set.add(0x4e01);
1583         set.add(0x4e8c);
1584         set.add(0x4e8d);
1585         set.add(0x4e09);
1586         set.add(0x4e0a);
1587         set.add(0x56db);
1588         set.add(0x56dc);
1589         set.add(0x4e94);
1590         set.add(0x4e95);
1591         set.add(0x516d);
1592         set.add(0x516e);
1593         set.add(0x4e03);
1594         set.add(0x4e04);
1595         set.add(0x516b);
1596         set.add(0x516c);
1597         set.add(0x4e5d);
1598         set.add(0x4e5e);
1599 
1600         /* add for u_digit() */
1601         set.add(U_a);
1602         set.add(U_z+1);
1603         set.add(U_A);
1604         set.add(U_Z+1);
1605         set.add(U_FW_a);
1606         set.add(U_FW_z+1);
1607         set.add(U_FW_A);
1608         set.add(U_FW_Z+1);
1609 
1610         /* add for u_isxdigit() */
1611         set.add(U_f+1);
1612         set.add(U_F+1);
1613         set.add(U_FW_f+1);
1614         set.add(U_FW_F+1);
1615 
1616         /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
1617         set.add(WJ); /* range WJ..NOMDIG */
1618         set.add(0xfff0);
1619         set.add(0xfffb+1);
1620         set.add(0xe0000);
1621         set.add(0xe0fff+1);
1622 
1623         /* add for UCHAR_GRAPHEME_BASE and others */
1624         set.add(CGJ);
1625         set.add(CGJ+1);
1626 
1627         return set; // for chaining
1628     }
1629 
upropsvec_addPropertyStarts(UnicodeSet set)1630     public void upropsvec_addPropertyStarts(UnicodeSet set) {
1631         /* add the start code point of each same-value range of the properties vectors trie */
1632         if(m_additionalColumnsCount_>0) {
1633             /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
1634             Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
1635             Trie2.Range range;
1636             while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
1637                 set.add(range.startCodePoint);
1638             }
1639         }
1640     }
1641 
ulayout_addPropertyStarts(int src, UnicodeSet set)1642     static UnicodeSet ulayout_addPropertyStarts(int src, UnicodeSet set) {
1643         return LayoutProps.INSTANCE.addPropertyStarts(src, set);
1644     }
1645 
1646     // This static initializer block must be placed after
1647     // other static member initialization
1648     static {
1649         try {
1650             INSTANCE = new UCharacterProperty();
1651         }
1652         catch (IOException e) {
1653             throw new MissingResourceException(e.getMessage(),"","");
1654         }
1655     }
1656 
1657 /*----------------------------------------------------------------
1658  * Inclusions list
1659  *----------------------------------------------------------------*/
1660 
1661     /*
1662      * Return a set of characters for property enumeration.
1663      * The set implicitly contains 0x110000 as well, which is one more than the highest
1664      * Unicode code point.
1665      *
1666      * This set is used as an ordered list - its code points are ordered, and
1667      * consecutive code points (in Unicode code point order) in the set define a range.
1668      * For each two consecutive characters (start, limit) in the set,
1669      * all of the UCD/normalization and related properties for
1670      * all code points start..limit-1 are all the same,
1671      * except for character names and ISO comments.
1672      *
1673      * All Unicode code points U+0000..U+10ffff are covered by these ranges.
1674      * The ranges define a partition of the Unicode code space.
1675      * ICU uses the inclusions set to enumerate properties for generating
1676      * UnicodeSets containing all code points that have a certain property value.
1677      *
1678      * The Inclusion List is generated from the UCD. It is generated
1679      * by enumerating the data tries, and code points for hardcoded properties
1680      * are added as well.
1681      *
1682      * --------------------------------------------------------------------------
1683      *
1684      * The following are ideas for getting properties-unique code point ranges,
1685      * with possible optimizations beyond the current implementation.
1686      * These optimizations would require more code and be more fragile.
1687      * The current implementation generates one single list (set) for all properties.
1688      *
1689      * To enumerate properties efficiently, one needs to know ranges of
1690      * repetitive values, so that the value of only each start code point
1691      * can be applied to the whole range.
1692      * This information is in principle available in the uprops.icu/unorm.icu data.
1693      *
1694      * There are two obstacles:
1695      *
1696      * 1. Some properties are computed from multiple data structures,
1697      *    making it necessary to get repetitive ranges by intersecting
1698      *    ranges from multiple tries.
1699      *
1700      * 2. It is not economical to write code for getting repetitive ranges
1701      *    that are precise for each of some 50 properties.
1702      *
1703      * Compromise ideas:
1704      *
1705      * - Get ranges per trie, not per individual property.
1706      *   Each range contains the same values for a whole group of properties.
1707      *   This would generate currently five range sets, two for uprops.icu tries
1708      *   and three for unorm.icu tries.
1709      *
1710      * - Combine sets of ranges for multiple tries to get sufficient sets
1711      *   for properties, e.g., the uprops.icu main and auxiliary tries
1712      *   for all non-normalization properties.
1713      *
1714      * Ideas for representing ranges and combining them:
1715      *
1716      * - A UnicodeSet could hold just the start code points of ranges.
1717      *   Multiple sets are easily combined by or-ing them together.
1718      *
1719      * - Alternatively, a UnicodeSet could hold each even-numbered range.
1720      *   All ranges could be enumerated by using each start code point
1721      *   (for the even-numbered ranges) as well as each limit (end+1) code point
1722      *   (for the odd-numbered ranges).
1723      *   It should be possible to combine two such sets by xor-ing them,
1724      *   but no more than two.
1725      *
1726      * The second way to represent ranges may(?!) yield smaller UnicodeSet arrays,
1727      * but the first one is certainly simpler and applicable for combining more than
1728      * two range sets.
1729      *
1730      * It is possible to combine all range sets for all uprops/unorm tries into one
1731      * set that can be used for all properties.
1732      * As an optimization, there could be less-combined range sets for certain
1733      * groups of properties.
1734      * The relationship of which less-combined range set to use for which property
1735      * depends on the implementation of the properties and must be hardcoded
1736      * - somewhat error-prone and higher maintenance but can be tested easily
1737      * by building property sets "the simple way" in test code.
1738      *
1739      * ---
1740      *
1741      * Do not use a UnicodeSet pattern because that causes infinite recursion;
1742      * UnicodeSet depends on the inclusions set.
1743      *
1744      * ---
1745      *
1746      * getInclusions() is commented out starting 2005-feb-12 because
1747      * UnicodeSet now calls the uxyz_addPropertyStarts() directly,
1748      * and only for the relevant property source.
1749      */
1750     /*
1751     public UnicodeSet getInclusions() {
1752         UnicodeSet set = new UnicodeSet();
1753         NormalizerImpl.addPropertyStarts(set);
1754         addPropertyStarts(set);
1755         return set;
1756     }
1757     */
1758 }
1759