• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 ******************************************************************************
6 * Copyright (C) 1996-2015, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 ******************************************************************************
9 */
10 
11 package ohos.global.icu.impl;
12 
13 import java.util.NoSuchElementException;
14 
15 import ohos.global.icu.lang.UCharacter;
16 import ohos.global.icu.text.UTF16;
17 import ohos.global.icu.util.RangeValueIterator;
18 
19 /**
20  * <p>Class enabling iteration of the values in a Trie.</p>
21  *
22  * <p>2015-sep-03 TODO: Only used in test code, move there.
23  *
24  * <p>Result of each iteration contains the interval of codepoints that have
25  * the same value type and the value type itself.</p>
26  * <p>The comparison of each codepoint value is done via extract(), which the
27  * default implementation is to return the value as it is.</p>
28  * <p>Method extract() can be overwritten to perform manipulations on
29  * codepoint values in order to perform specialized comparison.</p>
30  * <p>TrieIterator is designed to be a generic iterator for the CharTrie
31  * and the IntTrie, hence to accommodate both types of data, the return
32  * result will be in terms of int (32 bit) values.</p>
33  * <p>See ohos.global.icu.text.UCharacterTypeIterator for examples of use.</p>
34  * <p>Notes for porting utrie_enum from icu4c to icu4j:<br>
35  * Internally, icu4c's utrie_enum performs all iterations in its body. In Java
36  * sense, the caller will have to pass a object with a callback function
37  * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit,
38  * uint32_t value) into utrie_enum. utrie_enum will then find ranges of
39  * codepoints with the same value as determined by
40  * UTrieEnumValue(const void *context, uint32_t value). for each range,
41  * utrie_enum calls the callback function to perform a task. In this way,
42  * icu4c performs the iteration within utrie_enum.
43  * To follow the JDK model, icu4j is slightly different from icu4c.
44  * Instead of requesting the caller to implement an object for a callback.
45  * The caller will have to implement a subclass of TrieIterator, fleshing out
46  * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j,
47  * the caller will have to code his own iteration and flesh out the task
48  * (equivalent to UTrieEnumRange) to be performed in the iteration loop.
49  * </p>
50  * <p>There are basically 3 usage scenarios for porting:</p>
51  * <p>1) UTrieEnumValue is the only implemented callback then just implement a
52  * subclass of TrieIterator and override the extract(int) method. The
53  * extract(int) method is analogus to UTrieEnumValue callback.
54  * </p>
55  * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement
56  * a subclass of TrieIterator, override the extract method and iterate, e.g
57  * </p>
58  * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange,
59  *               set);<br>
60  * In Java :<br>
61  * <pre>
62  * class TrieIteratorImpl extends TrieIterator{
63  *     public TrieIteratorImpl(Trie data){
64  *         super(data);
65  *     }
66  *     public int extract(int value){
67  *         // port the implementation of _enumPropertyStartsValue here
68  *     }
69  * }
70  * ....
71  * TrieIterator fcdIter  = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
72  * while(fcdIter.next(result)) {
73  *     // port the implementation of _enumPropertyStartsRange
74  * }
75  * </pre>
76  * </p>
77  * <p>3) UTrieEnumRange is the only implemented callback then just implement
78  * the while loop, when utrie_enum is called
79  * <pre>
80  * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
81  * TrieIterator fcdIter  = new TrieIterator(fcdTrieImpl.fcdTrie);
82  * while(fcdIter.next(result)){
83  *     set.add(result.start);
84  * }
85  * </p>
86  * @author synwee
87  * @see ohos.global.icu.impl.Trie
88  * @hide exposed on OHOS
89  */
90 public class TrieIterator implements RangeValueIterator
91 
92 {
93     // public constructor ---------------------------------------------
94 
95     /**
96     * TrieEnumeration constructor
97     * @param trie to be used
98     * @exception IllegalArgumentException throw when argument is null.
99     */
TrieIterator(Trie trie)100     public TrieIterator(Trie trie)
101     {
102         if (trie == null) {
103             throw new IllegalArgumentException(
104                                           "Argument trie cannot be null");
105         }
106         m_trie_             = trie;
107         // synwee: check that extract belongs to the child class
108         m_initialValue_     = extract(m_trie_.getInitialValue());
109         reset();
110     }
111 
112     // public methods -------------------------------------------------
113 
114     /**
115     * <p>Returns true if we are not at the end of the iteration, false
116     * otherwise.</p>
117     * <p>The next set of codepoints with the same value type will be
118     * calculated during this call and returned in the arguement element.</p>
119     * @param element return result
120     * @return true if we are not at the end of the iteration, false otherwise.
121     * @exception NoSuchElementException - if no more elements exist.
122     * @see ohos.global.icu.util.RangeValueIterator.Element
123     */
124     @Override
next(Element element)125     public final boolean next(Element element)
126     {
127         if (m_nextCodepoint_ > UCharacter.MAX_VALUE) {
128             return false;
129         }
130         if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE &&
131             calculateNextBMPElement(element)) {
132             return true;
133         }
134         calculateNextSupplementaryElement(element);
135         return true;
136     }
137 
138     /**
139     * Resets the iterator to the beginning of the iteration
140     */
141     @Override
reset()142     public final void reset()
143     {
144         m_currentCodepoint_ = 0;
145         m_nextCodepoint_    = 0;
146         m_nextIndex_        = 0;
147         m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_;
148         if (m_nextBlock_ == m_trie_.m_dataOffset_) {
149             m_nextValue_ = m_initialValue_;
150         }
151         else {
152             m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_));
153         }
154         m_nextBlockIndex_ = 0;
155         m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_;
156     }
157 
158     // protected methods ----------------------------------------------
159 
160     /**
161     * Called by next() to extracts a 32 bit value from a trie value
162     * used for comparison.
163     * This method is to be overwritten if special manipulation is to be done
164     * to retrieve a relevant comparison.
165     * The default function is to return the value as it is.
166     * @param value a value from the trie
167     * @return extracted value
168     */
extract(int value)169     protected int extract(int value)
170     {
171         return value;
172     }
173 
174     // private methods ------------------------------------------------
175 
176     /**
177     * Set the result values
178     * @param element return result object
179     * @param start codepoint of range
180     * @param limit (end + 1) codepoint of range
181     * @param value common value of range
182     */
setResult(Element element, int start, int limit, int value)183     private final void setResult(Element element, int start, int limit,
184                                  int value)
185     {
186         element.start = start;
187         element.limit = limit;
188         element.value = value;
189     }
190 
191     /**
192     * Finding the next element.
193     * This method is called just before returning the result of
194     * next().
195     * We always store the next element before it is requested.
196     * In the case that we have to continue calculations into the
197     * supplementary planes, a false will be returned.
198     * @param element return result object
199     * @return true if the next range is found, false if we have to proceed to
200     *         the supplementary range.
201     */
calculateNextBMPElement(Element element)202     private final boolean calculateNextBMPElement(Element element)
203     {
204         int currentValue    = m_nextValue_;
205         m_currentCodepoint_ = m_nextCodepoint_;
206         m_nextCodepoint_ ++;
207         m_nextBlockIndex_ ++;
208         if (!checkBlockDetail(currentValue)) {
209             setResult(element, m_currentCodepoint_, m_nextCodepoint_,
210                       currentValue);
211             return true;
212         }
213         // synwee check that next block index == 0 here
214         // enumerate BMP - the main loop enumerates data blocks
215         while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) {
216             // because of the way the character is split to form the index
217             // the lead surrogate and trail surrogate can not be in the
218             // mid of a block
219             if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) {
220                 // skip lead surrogate code units,
221                 // go to lead surrogate codepoints
222                 m_nextIndex_ = BMP_INDEX_LENGTH_;
223             }
224             else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) {
225                 // go back to regular BMP code points
226                 m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_;
227             } else {
228                 m_nextIndex_ ++;
229             }
230 
231             m_nextBlockIndex_ = 0;
232             if (!checkBlock(currentValue)) {
233                 setResult(element, m_currentCodepoint_, m_nextCodepoint_,
234                           currentValue);
235                 return true;
236             }
237         }
238         m_nextCodepoint_ --;   // step one back since this value has not been
239         m_nextBlockIndex_ --;  // retrieved yet.
240         return false;
241     }
242 
243     /**
244     * Finds the next supplementary element.
245     * For each entry in the trie, the value to be delivered is passed through
246     * extract().
247     * We always store the next element before it is requested.
248     * Called after calculateNextBMP() completes its round of BMP characters.
249     * There is a slight difference in the usage of m_currentCodepoint_
250     * here as compared to calculateNextBMP(). Though both represents the
251     * lower bound of the next element, in calculateNextBMP() it gets set
252     * at the start of any loop, where-else, in calculateNextSupplementary()
253     * since m_currentCodepoint_ already contains the lower bound of the
254     * next element (passed down from calculateNextBMP()), we keep it till
255     * the end before resetting it to the new value.
256     * Note, if there are no more iterations, it will never get to here.
257     * Blocked out by next().
258     * @param element return result object
259     */
calculateNextSupplementaryElement(Element element)260     private final void calculateNextSupplementaryElement(Element element)
261     {
262         int currentValue = m_nextValue_;
263         m_nextCodepoint_ ++;
264         m_nextBlockIndex_ ++;
265 
266         if (UTF16.getTrailSurrogate(m_nextCodepoint_)
267                                         != UTF16.TRAIL_SURROGATE_MIN_VALUE) {
268             // this piece is only called when we are in the middle of a lead
269             // surrogate block
270             if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) {
271                 setResult(element, m_currentCodepoint_, m_nextCodepoint_,
272                           currentValue);
273                 m_currentCodepoint_ = m_nextCodepoint_;
274                 return;
275             }
276             // we have cleared one block
277             m_nextIndex_ ++;
278             m_nextTrailIndexOffset_ ++;
279             if (!checkTrailBlock(currentValue)) {
280                 setResult(element, m_currentCodepoint_, m_nextCodepoint_,
281                           currentValue);
282                 m_currentCodepoint_ = m_nextCodepoint_;
283                 return;
284             }
285         }
286         int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
287         // enumerate supplementary code points
288         while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) {
289             // lead surrogate access
290             final int leadBlock =
291                    m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
292                                                    Trie.INDEX_STAGE_2_SHIFT_;
293             if (leadBlock == m_trie_.m_dataOffset_) {
294                 // no entries for a whole block of lead surrogates
295                 if (currentValue != m_initialValue_) {
296                     m_nextValue_      = m_initialValue_;
297                     m_nextBlock_      = leadBlock;  // == m_trie_.m_dataOffset_
298                     m_nextBlockIndex_ = 0;
299                     setResult(element, m_currentCodepoint_, m_nextCodepoint_,
300                               currentValue);
301                     m_currentCodepoint_ = m_nextCodepoint_;
302                     return;
303                 }
304 
305                 nextLead += DATA_BLOCK_LENGTH_;
306                 // number of total affected supplementary codepoints in one
307                 // block
308                 // this is not a simple addition of
309                 // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
310                 // that we might have moved some of the codepoints
311                 m_nextCodepoint_ = Character.toCodePoint((char)nextLead, (char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
312                 continue;
313             }
314             if (m_trie_.m_dataManipulate_ == null) {
315                 throw new NullPointerException(
316                             "The field DataManipulate in this Trie is null");
317             }
318             // enumerate trail surrogates for this lead surrogate
319             m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
320                                m_trie_.getValue(leadBlock +
321                                    (nextLead & Trie.INDEX_STAGE_3_MASK_)));
322             if (m_nextIndex_ <= 0) {
323                 // no data for this lead surrogate
324                 if (currentValue != m_initialValue_) {
325                     m_nextValue_      = m_initialValue_;
326                     m_nextBlock_      = m_trie_.m_dataOffset_;
327                     m_nextBlockIndex_ = 0;
328                     setResult(element, m_currentCodepoint_, m_nextCodepoint_,
329                               currentValue);
330                     m_currentCodepoint_ = m_nextCodepoint_;
331                     return;
332                 }
333                 m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_;
334             } else {
335                 m_nextTrailIndexOffset_ = 0;
336                 if (!checkTrailBlock(currentValue)) {
337                     setResult(element, m_currentCodepoint_, m_nextCodepoint_,
338                               currentValue);
339                     m_currentCodepoint_ = m_nextCodepoint_;
340                     return;
341                 }
342             }
343             nextLead ++;
344          }
345 
346          // deliver last range
347          setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1,
348                    currentValue);
349     }
350 
351     /**
352     * Internal block value calculations
353     * Performs calculations on a data block to find codepoints in m_nextBlock_
354     * after the index m_nextBlockIndex_ that has the same value.
355     * Note m_*_ variables at this point is the next codepoint whose value
356     * has not been calculated.
357     * But when returned with false, it will be the last codepoint whose
358     * value has been calculated.
359     * @param currentValue the value which other codepoints are tested against
360     * @return true if the whole block has the same value as currentValue or if
361     *              the whole block has been calculated, false otherwise.
362     */
checkBlockDetail(int currentValue)363     private final boolean checkBlockDetail(int currentValue)
364     {
365         while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) {
366             m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ +
367                                                     m_nextBlockIndex_));
368             if (m_nextValue_ != currentValue) {
369                 return false;
370             }
371             ++ m_nextBlockIndex_;
372             ++ m_nextCodepoint_;
373         }
374         return true;
375     }
376 
377     /**
378     * Internal block value calculations
379     * Performs calculations on a data block to find codepoints in m_nextBlock_
380     * that has the same value.
381     * Will call checkBlockDetail() if highlevel check fails.
382     * Note m_*_ variables at this point is the next codepoint whose value
383     * has not been calculated.
384     * @param currentBlock the initial block containing all currentValue
385     * @param currentValue the value which other codepoints are tested against
386     * @return true if the whole block has the same value as currentValue or if
387     *              the whole block has been calculated, false otherwise.
388     */
checkBlock(int currentValue)389     private final boolean checkBlock(int currentValue)
390     {
391         int currentBlock = m_nextBlock_;
392         m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] <<
393                                                   Trie.INDEX_STAGE_2_SHIFT_;
394         if (m_nextBlock_ == currentBlock &&
395             (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) {
396             // the block is the same as the previous one, filled with
397             // currentValue
398             m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
399         }
400         else if (m_nextBlock_ == m_trie_.m_dataOffset_) {
401             // this is the all-initial-value block
402             if (currentValue != m_initialValue_) {
403                 m_nextValue_      = m_initialValue_;
404                 m_nextBlockIndex_ = 0;
405                 return false;
406             }
407             m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
408         }
409         else {
410             if (!checkBlockDetail(currentValue)) {
411                 return false;
412             }
413         }
414         return true;
415     }
416 
417     /**
418     * Internal block value calculations
419     * Performs calculations on multiple data blocks for a set of trail
420     * surrogates to find codepoints in m_nextBlock_ that has the same value.
421     * Will call checkBlock() for internal block checks.
422     * Note m_*_ variables at this point is the next codepoint whose value
423     * has not been calculated.
424     * @param currentValue the value which other codepoints are tested against
425     * @return true if the whole block has the same value as currentValue or if
426     *              the whole block has been calculated, false otherwise.
427     */
checkTrailBlock(int currentValue)428     private final boolean checkTrailBlock(int currentValue)
429     {
430         // enumerate code points for this lead surrogate
431         while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_)
432         {
433             // if we ever reach here, we are at the start of a new block
434             m_nextBlockIndex_ = 0;
435             // copy of most of the body of the BMP loop
436             if (!checkBlock(currentValue)) {
437                 return false;
438             }
439             m_nextTrailIndexOffset_ ++;
440             m_nextIndex_ ++;
441         }
442         return true;
443     }
444 
445     /**
446     * Checks if we are beginning at the start of a initial block.
447     * If we are then the rest of the codepoints in this initial block
448     * has the same values.
449     * We increment m_nextCodepoint_ and relevant data members if so.
450     * This is used only in for the supplementary codepoints because
451     * the offset to the trail indexes could be 0.
452     * @return true if we are at the start of a initial block.
453     */
checkNullNextTrailIndex()454     private final boolean checkNullNextTrailIndex()
455     {
456         if (m_nextIndex_ <= 0) {
457             m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
458             int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
459             int leadBlock =
460                    m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
461                                                    Trie.INDEX_STAGE_2_SHIFT_;
462             if (m_trie_.m_dataManipulate_ == null) {
463                 throw new NullPointerException(
464                             "The field DataManipulate in this Trie is null");
465             }
466             m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
467                                m_trie_.getValue(leadBlock +
468                                    (nextLead & Trie.INDEX_STAGE_3_MASK_)));
469             m_nextIndex_ --;
470             m_nextBlockIndex_ =  DATA_BLOCK_LENGTH_;
471             return true;
472         }
473         return false;
474     }
475 
476     // private data members --------------------------------------------
477 
478     /**
479     * Size of the stage 1 BMP indexes
480     */
481     private static final int BMP_INDEX_LENGTH_ =
482                                         0x10000 >> Trie.INDEX_STAGE_1_SHIFT_;
483     /**
484     * Lead surrogate minimum value
485     */
486     private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800;
487     /**
488     * Trail surrogate minimum value
489     */
490     private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;
491     /*
492     * Trail surrogate maximum value
493     */
494     //private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF;
495     /**
496     * Number of trail surrogate
497     */
498     private static final int TRAIL_SURROGATE_COUNT_ = 0x400;
499     /**
500     * Number of stage 1 indexes for supplementary calculations that maps to
501     * each lead surrogate character.
502     * See second pass into getRawOffset for the trail surrogate character.
503     * 10 for significant number of bits for trail surrogates, 5 for what we
504     * discard during shifting.
505     */
506     private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ =
507                                     1 << (10 - Trie.INDEX_STAGE_1_SHIFT_);
508     /**
509     * Number of data values in a stage 2 (data array) block.
510     */
511     private static final int DATA_BLOCK_LENGTH_ =
512                                               1 << Trie.INDEX_STAGE_1_SHIFT_;
513 //    /**
514 //    * Number of codepoints in a stage 2 block
515 //    */
516 //    private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ =
517 //                                                     DATA_BLOCK_LENGTH_ << 10;
518     /**
519     * Trie instance
520     */
521     private Trie m_trie_;
522     /**
523     * Initial value for trie values
524     */
525     private int m_initialValue_;
526     /**
527     * Next element results and data.
528     */
529     private int m_currentCodepoint_;
530     private int m_nextCodepoint_;
531     private int m_nextValue_;
532     private int m_nextIndex_;
533     private int m_nextBlock_;
534     private int m_nextBlockIndex_;
535     private int m_nextTrailIndexOffset_;
536 }
537