• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  ******************************************************************************
6  * Copyright (C) 1996-2015, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  ******************************************************************************
9  */
10 
11 package ohos.global.icu.impl;
12 
13 import java.nio.ByteBuffer;
14 
15 import ohos.global.icu.text.UTF16;
16 
17 /**
18  * Trie implementation which stores data in char, 16 bits.
19  * @author synwee
20  * @see ohos.global.icu.impl.Trie
21  * @hide exposed on OHOS
22  */
23 
24  // note that i need to handle the block calculations later, since chartrie
25  // in icu4c uses the same index array.
26 public class CharTrie extends Trie
27 {
28     // public constructors ---------------------------------------------
29 
30     /**
31      * <p>Creates a new Trie with the settings for the trie data.</p>
32      * <p>Unserialize the 32-bit-aligned input buffer and use the data for the
33      * trie.</p>
34      * @param bytes data of an ICU data file, containing the trie
35      * @param dataManipulate object which provides methods to parse the char
36      *                        data
37      */
CharTrie(ByteBuffer bytes, DataManipulate dataManipulate)38     public CharTrie(ByteBuffer bytes, DataManipulate dataManipulate) {
39         super(bytes, dataManipulate);
40 
41         if (!isCharTrie()) {
42             throw new IllegalArgumentException(
43                                "Data given does not belong to a char trie.");
44         }
45     }
46 
47     /**
48      * Make a dummy CharTrie.
49      * A dummy trie is an empty runtime trie, used when a real data trie cannot
50      * be loaded.
51      *
52      * The trie always returns the initialValue,
53      * or the leadUnitValue for lead surrogate code points.
54      * The Latin-1 part is always set up to be linear.
55      *
56      * @param initialValue the initial value that is set for all code points
57      * @param leadUnitValue the value for lead surrogate code _units_ that do not
58      *                      have associated supplementary data
59      * @param dataManipulate object which provides methods to parse the char data
60      */
61     @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate)62     public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
63         super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
64 
65         int dataLength, latin1Length, i, limit;
66         char block;
67 
68         /* calculate the actual size of the dummy trie data */
69 
70         /* max(Latin-1, block 0) */
71         dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
72         if(leadUnitValue!=initialValue) {
73             dataLength+=DATA_BLOCK_LENGTH;
74         }
75         m_data_=new char[dataLength];
76         m_dataLength_=dataLength;
77 
78         m_initialValue_=(char)initialValue;
79 
80         /* fill the index and data arrays */
81 
82         /* indexes are preset to 0 (block 0) */
83 
84         /* Latin-1 data */
85         for(i=0; i<latin1Length; ++i) {
86             m_data_[i]=(char)initialValue;
87         }
88 
89         if(leadUnitValue!=initialValue) {
90             /* indexes for lead surrogate code units to the block after Latin-1 */
91             block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
92             i=0xd800>>INDEX_STAGE_1_SHIFT_;
93             limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
94             for(; i<limit; ++i) {
95                 m_index_[i]=block;
96             }
97 
98             /* data for lead surrogate code units */
99             limit=latin1Length+DATA_BLOCK_LENGTH;
100             for(i=latin1Length; i<limit; ++i) {
101                 m_data_[i]=(char)leadUnitValue;
102             }
103         }
104     }
105 
106     // public methods --------------------------------------------------
107 
108     /**
109     * Gets the value associated with the codepoint.
110     * If no value is associated with the codepoint, a default value will be
111     * returned.
112     * @param ch codepoint
113     * @return offset to data
114     */
getCodePointValue(int ch)115     public final char getCodePointValue(int ch)
116     {
117         int offset;
118 
119         // fastpath for U+0000..U+D7FF
120         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
121             // copy of getRawOffset()
122             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
123                     + (ch & INDEX_STAGE_3_MASK_);
124             return m_data_[offset];
125         }
126 
127         // handle U+D800..U+10FFFF
128         offset = getCodePointOffset(ch);
129 
130         // return -1 if there is an error, in this case we return the default
131         // value: m_initialValue_
132         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
133     }
134 
135     /**
136     * Gets the value to the data which this lead surrogate character points
137     * to.
138     * Returned data may contain folding offset information for the next
139     * trailing surrogate character.
140     * This method does not guarantee correct results for trail surrogates.
141     * @param ch lead surrogate character
142     * @return data value
143     */
getLeadValue(char ch)144     public final char getLeadValue(char ch)
145     {
146        return m_data_[getLeadOffset(ch)];
147     }
148 
149     /**
150     * Get the value associated with the BMP code point.
151     * Lead surrogate code points are treated as normal code points, with
152     * unfolded values that may differ from getLeadValue() results.
153     * @param ch the input BMP code point
154     * @return trie data value associated with the BMP codepoint
155     */
getBMPValue(char ch)156     public final char getBMPValue(char ch)
157     {
158         return m_data_[getBMPOffset(ch)];
159     }
160 
161     /**
162     * Get the value associated with a pair of surrogates.
163     * @param lead a lead surrogate
164     * @param trail a trail surrogate
165     */
getSurrogateValue(char lead, char trail)166     public final char getSurrogateValue(char lead, char trail)
167     {
168         int offset = getSurrogateOffset(lead, trail);
169         if (offset > 0) {
170             return m_data_[offset];
171         }
172         return m_initialValue_;
173     }
174 
175     /**
176     * <p>Get a value from a folding offset (from the value of a lead surrogate)
177     * and a trail surrogate.</p>
178     * <p>If the
179     * @param leadvalue value associated with the lead surrogate which contains
180     *        the folding offset
181     * @param trail surrogate
182     * @return trie data value associated with the trail character
183     */
getTrailValue(int leadvalue, char trail)184     public final char getTrailValue(int leadvalue, char trail)
185     {
186         if (m_dataManipulate_ == null) {
187             throw new NullPointerException(
188                              "The field DataManipulate in this Trie is null");
189         }
190         int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
191         if (offset > 0) {
192             return m_data_[getRawOffset(offset,
193                                         (char)(trail & SURROGATE_MASK_))];
194         }
195         return m_initialValue_;
196     }
197 
198     /**
199      * <p>Gets the latin 1 fast path value.</p>
200      * <p>Note this only works if latin 1 characters have their own linear
201      * array.</p>
202      * @param ch latin 1 characters
203      * @return value associated with latin character
204      */
getLatin1LinearValue(char ch)205     public final char getLatin1LinearValue(char ch)
206     {
207         return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch];
208     }
209 
210     /**
211      * Checks if the argument Trie has the same data as this Trie
212      * @param other Trie to check
213      * @return true if the argument Trie has the same data as this Trie, false
214      *         otherwise
215      */
216     ///CLOVER:OFF
217     @Override
equals(Object other)218     public boolean equals(Object other)
219     {
220         boolean result = super.equals(other);
221         if (result && other instanceof CharTrie) {
222             CharTrie othertrie = (CharTrie)other;
223             return m_initialValue_ == othertrie.m_initialValue_;
224         }
225         return false;
226     }
227 
228     @Override
hashCode()229     public int hashCode() {
230         assert false : "hashCode not designed";
231         return 42;
232     }
233     ///CLOVER:ON
234 
235     // protected methods -----------------------------------------------
236 
237     /**
238      * <p>Parses the byte buffer and stores its trie content into a index and
239      * data array</p>
240      * @param bytes buffer containing trie data
241      */
242     @Override
unserialize(ByteBuffer bytes)243     protected final void unserialize(ByteBuffer bytes)
244     {
245         int indexDataLength = m_dataOffset_ + m_dataLength_;
246         m_index_ = ICUBinary.getChars(bytes, indexDataLength, 0);
247         m_data_           = m_index_;
248         m_initialValue_   = m_data_[m_dataOffset_];
249     }
250 
251     /**
252     * Gets the offset to the data which the surrogate pair points to.
253     * @param lead lead surrogate
254     * @param trail trailing surrogate
255     * @return offset to data
256     */
257     @Override
getSurrogateOffset(char lead, char trail)258     protected final int getSurrogateOffset(char lead, char trail)
259     {
260         if (m_dataManipulate_ == null) {
261             throw new NullPointerException(
262                              "The field DataManipulate in this Trie is null");
263         }
264 
265         // get fold position for the next trail surrogate
266         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
267 
268         // get the real data from the folded lead/trail units
269         if (offset > 0) {
270             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
271         }
272 
273         // return -1 if there is an error, in this case we return the default
274         // value: m_initialValue_
275         return -1;
276     }
277 
278     /**
279     * Gets the value at the argument index.
280     * For use internally in TrieIterator.
281     * @param index value at index will be retrieved
282     * @return 32 bit value
283     * @see ohos.global.icu.impl.TrieIterator
284     */
285     @Override
getValue(int index)286     protected final int getValue(int index)
287     {
288         return m_data_[index];
289     }
290 
291     /**
292     * Gets the default initial value
293     * @return 32 bit value
294     */
295     @Override
getInitialValue()296     protected final int getInitialValue()
297     {
298         return m_initialValue_;
299     }
300 
301     // private data members --------------------------------------------
302 
303     /**
304     * Default value
305     */
306     private char m_initialValue_;
307     /**
308     * Array of char data
309     */
310     private char m_data_[];
311 }
312