• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 ******************************************************************************
6 * Copyright (C) 1996-2010, International Business Machines Corporation and   *
7 * others. All Rights Reserved.                                               *
8 ******************************************************************************
9 */
10 
11 package ohos.global.icu.impl;
12 
13 import java.io.DataOutputStream;
14 import java.io.IOException;
15 import java.io.OutputStream;
16 import java.util.Arrays;
17 
18 import ohos.global.icu.lang.UCharacter;
19 import ohos.global.icu.text.UTF16;
20 
21 /**
22  * Builder class to manipulate and generate a trie.
23  * This is useful for ICU data in primitive types.
24  * Provides a compact way to store information that is indexed by Unicode
25  * values, such as character properties, types, keyboard values, etc. This is
26  * very useful when you have a block of Unicode data that contains significant
27  * values while the rest of the Unicode data is unused in the application or
28  * when you have a lot of redundance, such as where all 21,000 Han ideographs
29  * have the same value.  However, lookup is much faster than a hash table.
30  * A trie of any primitive data type serves two purposes:
31  * <UL type = round>
32  *     <LI>Fast access of the indexed values.
33  *     <LI>Smaller memory footprint.
34  * </UL>
35  * This is a direct port from the ICU4C version
36  * @author             Syn Wee Quek
37  * @hide exposed on OHOS
38  */
39 public class IntTrieBuilder extends TrieBuilder
40 {
41     // public constructor ----------------------------------------------
42 
43     /**
44      * Copy constructor
45      */
IntTrieBuilder(IntTrieBuilder table)46     public IntTrieBuilder(IntTrieBuilder table)
47     {
48         super(table);
49         m_data_ = new int[m_dataCapacity_];
50         System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
51         m_initialValue_ = table.m_initialValue_;
52         m_leadUnitValue_ = table.m_leadUnitValue_;
53     }
54 
55     /**
56      * Constructs a build table
57      * @param aliasdata data to be filled into table
58      * @param maxdatalength maximum data length allowed in table
59      * @param initialvalue inital data value
60      * @param latin1linear is latin 1 to be linear
61      */
IntTrieBuilder(int aliasdata[], int maxdatalength, int initialvalue, int leadunitvalue, boolean latin1linear)62     public IntTrieBuilder(int aliasdata[], int maxdatalength,
63                           int initialvalue, int leadunitvalue,
64                           boolean latin1linear)
65     {
66         super();
67         if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
68                                                   && maxdatalength < 1024)) {
69             throw new IllegalArgumentException(
70                                                "Argument maxdatalength is too small");
71         }
72 
73         if (aliasdata != null) {
74             m_data_ = aliasdata;
75         }
76         else {
77             m_data_ = new int[maxdatalength];
78         }
79 
80         // preallocate and reset the first data block (block index 0)
81         int j = DATA_BLOCK_LENGTH;
82 
83         if (latin1linear) {
84             // preallocate and reset the first block (number 0) and Latin-1
85             // (U+0000..U+00ff) after that made sure above that
86             // maxDataLength >= 1024
87             // set indexes to point to consecutive data blocks
88             int i = 0;
89             do {
90                 // do this at least for trie->index[0] even if that block is
91                 // only partly used for Latin-1
92                 m_index_[i ++] = j;
93                 j += DATA_BLOCK_LENGTH;
94             } while (i < (256 >> SHIFT_));
95         }
96 
97         m_dataLength_ = j;
98         // reset the initially allocated blocks to the initial value
99         Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
100         m_initialValue_ = initialvalue;
101         m_leadUnitValue_ = leadunitvalue;
102         m_dataCapacity_ = maxdatalength;
103         m_isLatin1Linear_ = latin1linear;
104         m_isCompacted_ = false;
105     }
106 
107     // public methods -------------------------------------------------------
108 
109     /*public final void print()
110       {
111       int i = 0;
112       int oldvalue = m_index_[i];
113       int count = 0;
114       System.out.println("index length " + m_indexLength_
115       + " --------------------------");
116       while (i < m_indexLength_) {
117       if (m_index_[i] != oldvalue) {
118       System.out.println("index has " + count + " counts of "
119       + Integer.toHexString(oldvalue));
120       count = 0;
121       oldvalue = m_index_[i];
122       }
123       count ++;
124       i ++;
125       }
126       System.out.println("index has " + count + " counts of "
127       + Integer.toHexString(oldvalue));
128       i = 0;
129       oldvalue = m_data_[i];
130       count = 0;
131       System.out.println("data length " + m_dataLength_
132       + " --------------------------");
133       while (i < m_dataLength_) {
134       if (m_data_[i] != oldvalue) {
135       if ((oldvalue & 0xf1000000) == 0xf1000000) {
136       int temp = oldvalue & 0xffffff;
137       temp += 0x320;
138       oldvalue = 0xf1000000 | temp;
139       }
140       if ((oldvalue & 0xf2000000) == 0xf2000000) {
141       int temp = oldvalue & 0xffffff;
142       temp += 0x14a;
143       oldvalue = 0xf2000000 | temp;
144       }
145       System.out.println("data has " + count + " counts of "
146       + Integer.toHexString(oldvalue));
147       count = 0;
148       oldvalue = m_data_[i];
149       }
150       count ++;
151       i ++;
152       }
153       if ((oldvalue & 0xf1000000) == 0xf1000000) {
154       int temp = oldvalue & 0xffffff;
155       temp += 0x320;
156       oldvalue = 0xf1000000 | temp;
157       }
158       if ((oldvalue & 0xf2000000) == 0xf2000000) {
159       int temp = oldvalue & 0xffffff;
160       temp += 0x14a;
161       oldvalue = 0xf2000000 | temp;
162       }
163       System.out.println("data has " + count + " counts of "
164       + Integer.toHexString(oldvalue));
165       }
166     */
167     /**
168      * Gets a 32 bit data from the table data
169      * @param ch codepoint which data is to be retrieved
170      * @return the 32 bit data
171      */
getValue(int ch)172     public int getValue(int ch)
173     {
174         // valid, uncompacted trie and valid c?
175         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
176             return 0;
177         }
178 
179         int block = m_index_[ch >> SHIFT_];
180         return m_data_[Math.abs(block) + (ch & MASK_)];
181     }
182 
183     /**
184      * Get a 32 bit data from the table data
185      * @param ch  code point for which data is to be retrieved.
186      * @param inBlockZero  Output parameter, inBlockZero[0] returns true if the
187      *                      char maps into block zero, otherwise false.
188      * @return the 32 bit data value.
189      */
getValue(int ch, boolean [] inBlockZero)190     public int getValue(int ch, boolean [] inBlockZero)
191     {
192         // valid, uncompacted trie and valid c?
193         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
194             if (inBlockZero != null) {
195                 inBlockZero[0] = true;
196             }
197             return 0;
198         }
199 
200         int block = m_index_[ch >> SHIFT_];
201         if (inBlockZero != null) {
202             inBlockZero[0] = (block == 0);
203         }
204         return m_data_[Math.abs(block) + (ch & MASK_)];
205     }
206 
207 
208     /**
209      * Sets a 32 bit data in the table data
210      * @param ch codepoint which data is to be set
211      * @param value to set
212      * @return true if the set is successful, otherwise
213      *              if the table has been compacted return false
214      */
setValue(int ch, int value)215     public boolean setValue(int ch, int value)
216     {
217         // valid, uncompacted trie and valid c?
218         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
219             return false;
220         }
221 
222         int block = getDataBlock(ch);
223         if (block < 0) {
224             return false;
225         }
226 
227         m_data_[block + (ch & MASK_)] = value;
228         return true;
229     }
230 
231     /**
232      * Serializes the build table with 32 bit data
233      * @param datamanipulate builder raw fold method implementation
234      * @param triedatamanipulate result trie fold method
235      * @return a new trie
236      */
serialize(TrieBuilder.DataManipulate datamanipulate, Trie.DataManipulate triedatamanipulate)237     public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
238                              Trie.DataManipulate triedatamanipulate)
239     {
240         if (datamanipulate == null) {
241             throw new IllegalArgumentException("Parameters can not be null");
242         }
243         // fold and compact if necessary, also checks that indexLength is
244         // within limits
245         if (!m_isCompacted_) {
246             // compact once without overlap to improve folding
247             compact(false);
248             // fold the supplementary part of the index array
249             fold(datamanipulate);
250             // compact again with overlap for minimum data array length
251             compact(true);
252             m_isCompacted_ = true;
253         }
254         // is dataLength within limits?
255         if (m_dataLength_ >= MAX_DATA_LENGTH_) {
256             throw new ArrayIndexOutOfBoundsException("Data length too small");
257         }
258 
259         char index[] = new char[m_indexLength_];
260         int data[] = new int[m_dataLength_];
261         // write the index (stage 1) array and the 32-bit data (stage 2) array
262         // write 16-bit index values shifted right by INDEX_SHIFT_
263         for (int i = 0; i < m_indexLength_; i ++) {
264             index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
265         }
266         // write 32-bit data values
267         System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
268 
269         int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
270         options |= OPTIONS_DATA_IS_32_BIT_;
271         if (m_isLatin1Linear_) {
272             options |= OPTIONS_LATIN1_IS_LINEAR_;
273         }
274         return new IntTrie(index, data, m_initialValue_, options,
275                            triedatamanipulate);
276     }
277 
278 
279     /**
280      * Serializes the build table to an output stream.
281      *
282      * Compacts the build-time trie after all values are set, and then
283      * writes the serialized form onto an output stream.
284      *
285      * After this, this build-time Trie can only be serialized again and/or closed;
286      * no further values can be added.
287      *
288      * This function is the rough equivalent of utrie_seriaize() in ICU4C.
289      *
290      * @param os the output stream to which the seriaized trie will be written.
291      *         If nul, the function still returns the size of the serialized Trie.
292      * @param reduceTo16Bits If true, reduce the data size to 16 bits.  The resulting
293      *         serialized form can then be used to create a CharTrie.
294      * @param datamanipulate builder raw fold method implementation
295      * @return the number of bytes written to the output stream.
296      */
serialize(OutputStream os, boolean reduceTo16Bits, TrieBuilder.DataManipulate datamanipulate)297      public int serialize(OutputStream os, boolean reduceTo16Bits,
298             TrieBuilder.DataManipulate datamanipulate)  throws IOException {
299          if (datamanipulate == null) {
300              throw new IllegalArgumentException("Parameters can not be null");
301          }
302 
303          // fold and compact if necessary, also checks that indexLength is
304          // within limits
305          if (!m_isCompacted_) {
306              // compact once without overlap to improve folding
307              compact(false);
308              // fold the supplementary part of the index array
309              fold(datamanipulate);
310              // compact again with overlap for minimum data array length
311              compact(true);
312              m_isCompacted_ = true;
313          }
314 
315          // is dataLength within limits?
316          int length;
317          if (reduceTo16Bits) {
318              length = m_dataLength_ + m_indexLength_;
319          } else {
320              length = m_dataLength_;
321          }
322          if (length >= MAX_DATA_LENGTH_) {
323              throw new ArrayIndexOutOfBoundsException("Data length too small");
324          }
325 
326          //  struct UTrieHeader {
327          //      int32_t   signature;
328          //      int32_t   options  (a bit field)
329          //      int32_t   indexLength
330          //      int32_t   dataLength
331          length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
332          if(reduceTo16Bits) {
333              length+=2*m_dataLength_;
334          } else {
335              length+=4*m_dataLength_;
336          }
337 
338          if (os == null) {
339              // No output stream.  Just return the length of the serialized Trie, in bytes.
340              return length;
341          }
342 
343          DataOutputStream dos = new DataOutputStream(os);
344          dos.writeInt(Trie.HEADER_SIGNATURE_);
345 
346          int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_);
347          if(!reduceTo16Bits) {
348              options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
349          }
350          if(m_isLatin1Linear_) {
351              options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
352          }
353          dos.writeInt(options);
354 
355          dos.writeInt(m_indexLength_);
356          dos.writeInt(m_dataLength_);
357 
358          /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
359          if(reduceTo16Bits) {
360              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
361              for (int i=0; i<m_indexLength_; i++) {
362                  int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
363                  dos.writeChar(v);
364              }
365 
366              /* write 16-bit data values */
367              for(int i=0; i<m_dataLength_; i++) {
368                  int v = m_data_[i] & 0x0000ffff;
369                  dos.writeChar(v);
370              }
371          } else {
372              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
373              for (int i=0; i<m_indexLength_; i++) {
374                  int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
375                  dos.writeChar(v);
376              }
377 
378              /* write 32-bit data values */
379              for(int i=0; i<m_dataLength_; i++) {
380                  dos.writeInt(m_data_[i]);
381              }
382         }
383 
384          return length;
385 
386     }
387 
388 
389     /**
390      * Set a value in a range of code points [start..limit].
391      * All code points c with start &lt;= c &lt; limit will get the value if
392      * overwrite is true or if the old value is 0.
393      * @param start the first code point to get the value
394      * @param limit one past the last code point to get the value
395      * @param value the value
396      * @param overwrite flag for whether old non-initial values are to be
397      *        overwritten
398      * @return false if a failure occurred (illegal argument or data array
399      *               overrun)
400      */
setRange(int start, int limit, int value, boolean overwrite)401     public boolean setRange(int start, int limit, int value,
402                             boolean overwrite)
403     {
404         // repeat value in [start..limit[
405         // mark index values for repeat-data blocks by setting bit 31 of the
406         // index values fill around existing values if any, if(overwrite)
407 
408         // valid, uncompacted trie and valid indexes?
409         if (m_isCompacted_ || start < UCharacter.MIN_VALUE
410             || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
411             || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
412             return false;
413         }
414 
415         if (start == limit) {
416             return true; // nothing to do
417         }
418 
419         if ((start & MASK_) != 0) {
420             // set partial block at [start..following block boundary[
421             int block = getDataBlock(start);
422             if (block < 0) {
423                 return false;
424             }
425 
426             int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
427             if (nextStart <= limit) {
428                 fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
429                           value, overwrite);
430                 start = nextStart;
431             }
432             else {
433                 fillBlock(block, start & MASK_, limit & MASK_,
434                           value, overwrite);
435                 return true;
436             }
437         }
438 
439         // number of positions in the last, partial block
440         int rest = limit & MASK_;
441 
442         // round down limit to a block boundary
443         limit &= ~MASK_;
444 
445         // iterate over all-value blocks
446         int repeatBlock = 0;
447         if (value == m_initialValue_) {
448             // repeatBlock = 0; assigned above
449         }
450         else {
451             repeatBlock = -1;
452         }
453         while (start < limit) {
454             // get index value
455             int block = m_index_[start >> SHIFT_];
456             if (block > 0) {
457                 // already allocated, fill in value
458                 fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
459             }
460             else if (m_data_[-block] != value && (block == 0 || overwrite)) {
461                 // set the repeatBlock instead of the current block 0 or range
462                 // block
463                 if (repeatBlock >= 0) {
464                     m_index_[start >> SHIFT_] = -repeatBlock;
465                 }
466                 else {
467                     // create and set and fill the repeatBlock
468                     repeatBlock = getDataBlock(start);
469                     if (repeatBlock < 0) {
470                         return false;
471                     }
472 
473                     // set the negative block number to indicate that it is a
474                     // repeat block
475                     m_index_[start >> SHIFT_] = -repeatBlock;
476                     fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
477                 }
478             }
479 
480             start += DATA_BLOCK_LENGTH;
481         }
482 
483         if (rest > 0) {
484             // set partial block at [last block boundary..limit[
485             int block = getDataBlock(start);
486             if (block < 0) {
487                 return false;
488             }
489             fillBlock(block, 0, rest, value, overwrite);
490         }
491 
492         return true;
493     }
494 
495     // protected data member ------------------------------------------------
496 
497     protected int m_data_[];
498     protected int m_initialValue_;
499 
500     //  private data member ------------------------------------------------
501 
502     private int m_leadUnitValue_;
503 
504     // private methods ------------------------------------------------------
505 
allocDataBlock()506     private int allocDataBlock()
507     {
508         int newBlock = m_dataLength_;
509         int newTop = newBlock + DATA_BLOCK_LENGTH;
510         if (newTop > m_dataCapacity_) {
511             // out of memory in the data array
512             return -1;
513         }
514         m_dataLength_ = newTop;
515         return newBlock;
516     }
517 
518     /**
519      * No error checking for illegal arguments.
520      * @param ch codepoint to look for
521      * @return -1 if no new data block available (out of memory in data array)
522      */
getDataBlock(int ch)523     private int getDataBlock(int ch)
524     {
525         ch >>= SHIFT_;
526         int indexValue = m_index_[ch];
527         if (indexValue > 0) {
528             return indexValue;
529         }
530 
531         // allocate a new data block
532         int newBlock = allocDataBlock();
533         if (newBlock < 0) {
534             // out of memory in the data array
535             return -1;
536         }
537         m_index_[ch] = newBlock;
538 
539         // copy-on-write for a block from a setRange()
540         System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
541                          DATA_BLOCK_LENGTH << 2);
542         return newBlock;
543     }
544 
545     /**
546      * Compact a folded build-time trie.
547      * The compaction
548      * - removes blocks that are identical with earlier ones
549      * - overlaps adjacent blocks as much as possible (if overlap == true)
550      * - moves blocks in steps of the data granularity
551      * - moves and overlaps blocks that overlap with multiple values in the overlap region
552      *
553      * It does not
554      * - try to move and overlap blocks that are not already adjacent
555      * @param overlap flag
556      */
compact(boolean overlap)557     private void compact(boolean overlap)
558     {
559         if (m_isCompacted_) {
560             return; // nothing left to do
561         }
562 
563         // compaction
564         // initialize the index map with "block is used/unused" flags
565         findUnusedBlocks();
566 
567         // if Latin-1 is preallocated and linear, then do not compact Latin-1
568         // data
569         int overlapStart = DATA_BLOCK_LENGTH;
570         if (m_isLatin1Linear_ && SHIFT_ <= 8) {
571             overlapStart += 256;
572         }
573 
574         int newStart = DATA_BLOCK_LENGTH;
575         int i;
576         for (int start = newStart; start < m_dataLength_;) {
577             // start: index of first entry of current block
578             // newStart: index where the current block is to be moved
579             //           (right after current end of already-compacted data)
580             // skip blocks that are not used
581             if (m_map_[start >>> SHIFT_] < 0) {
582                 // advance start to the next block
583                 start += DATA_BLOCK_LENGTH;
584                 // leave newStart with the previous block!
585                 continue;
586             }
587             // search for an identical block
588             if (start >= overlapStart) {
589                 i = findSameDataBlock(m_data_, newStart, start,
590                                           overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
591                 if (i >= 0) {
592                     // found an identical block, set the other block's index
593                     // value for the current block
594                     m_map_[start >>> SHIFT_] = i;
595                     // advance start to the next block
596                     start += DATA_BLOCK_LENGTH;
597                     // leave newStart with the previous block!
598                     continue;
599                 }
600             }
601             // see if the beginning of this block can be overlapped with the
602             // end of the previous block
603             if(overlap && start>=overlapStart) {
604                 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
605                 for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
606                     i>0 && !equal_int(m_data_, newStart-i, start, i);
607                     i-=DATA_GRANULARITY_) {}
608             } else {
609                 i=0;
610             }
611             if (i > 0) {
612                 // some overlap
613                 m_map_[start >>> SHIFT_] = newStart - i;
614                 // move the non-overlapping indexes to their new positions
615                 start += i;
616                 for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
617                     m_data_[newStart ++] = m_data_[start ++];
618                 }
619             }
620             else if (newStart < start) {
621                 // no overlap, just move the indexes to their new positions
622                 m_map_[start >>> SHIFT_] = newStart;
623                 for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
624                     m_data_[newStart ++] = m_data_[start ++];
625                 }
626             }
627             else { // no overlap && newStart==start
628                 m_map_[start >>> SHIFT_] = start;
629                 newStart += DATA_BLOCK_LENGTH;
630                 start = newStart;
631             }
632         }
633         // now adjust the index (stage 1) table
634         for (i = 0; i < m_indexLength_; ++ i) {
635             m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
636         }
637         m_dataLength_ = newStart;
638     }
639 
640     /**
641      * Find the same data block
642      * @param data array
643      * @param dataLength
644      * @param otherBlock
645      * @param step
646      */
findSameDataBlock(int data[], int dataLength, int otherBlock, int step)647     private static final int findSameDataBlock(int data[], int dataLength,
648                                                int otherBlock, int step)
649     {
650         // ensure that we do not even partially get past dataLength
651         dataLength -= DATA_BLOCK_LENGTH;
652 
653         for (int block = 0; block <= dataLength; block += step) {
654             if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
655                 return block;
656             }
657         }
658         return -1;
659     }
660 
661     /**
662      * Fold the normalization data for supplementary code points into
663      * a compact area on top of the BMP-part of the trie index,
664      * with the lead surrogates indexing this compact area.
665      *
666      * Duplicate the index values for lead surrogates:
667      * From inside the BMP area, where some may be overridden with folded values,
668      * to just after the BMP area, where they can be retrieved for
669      * code point lookups.
670      * @param manipulate fold implementation
671      */
fold(DataManipulate manipulate)672     private final void fold(DataManipulate manipulate)
673     {
674         int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
675         int index[] = m_index_;
676         // copy the lead surrogate indexes into a temporary array
677         System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
678                          SURROGATE_BLOCK_COUNT_);
679 
680         // set all values for lead surrogate code *units* to leadUnitValue
681         // so that by default runtime lookups will find no data for associated
682         // supplementary code points, unless there is data for such code points
683         // which will result in a non-zero folding value below that is set for
684         // the respective lead units
685         // the above saved the indexes for surrogate code *points*
686         // fill the indexes with simplified code from utrie_setRange32()
687         int block = 0;
688         if (m_leadUnitValue_ == m_initialValue_) {
689             // leadUnitValue == initialValue, use all-initial-value block
690             // block = 0; if block here left empty
691         }
692         else {
693             // create and fill the repeatBlock
694             block = allocDataBlock();
695             if (block < 0) {
696                 // data table overflow
697                 throw new IllegalStateException("Internal error: Out of memory space");
698             }
699             fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
700             // negative block number to indicate that it is a repeat block
701             block = -block;
702         }
703         for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
704             m_index_[c] = block;
705         }
706 
707         // Fold significant index values into the area just after the BMP
708         // indexes.
709         // In case the first lead surrogate has significant data,
710         // its index block must be used first (in which case the folding is a
711         // no-op).
712         // Later all folded index blocks are moved up one to insert the copied
713         // lead surrogate indexes.
714         int indexLength = BMP_INDEX_LENGTH_;
715         // search for any index (stage 1) entries for supplementary code points
716         for (int c = 0x10000; c < 0x110000;) {
717             if (index[c >> SHIFT_] != 0) {
718                 // there is data, treat the full block for a lead surrogate
719                 c &= ~0x3ff;
720                 // is there an identical index block?
721                 block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
722 
723                 // get a folded value for [c..c+0x400[ and,
724                 // if different from the value for the lead surrogate code
725                 // point, set it for the lead surrogate code unit
726 
727                 int value = manipulate.getFoldedValue(c,
728                                                       block + SURROGATE_BLOCK_COUNT_);
729                 if (value != getValue(UTF16.getLeadSurrogate(c))) {
730                     if (!setValue(UTF16.getLeadSurrogate(c), value)) {
731                         // data table overflow
732                         throw new ArrayIndexOutOfBoundsException(
733                                                                  "Data table overflow");
734                     }
735                     // if we did not find an identical index block...
736                     if (block == indexLength) {
737                         // move the actual index (stage 1) entries from the
738                         // supplementary position to the new one
739                         System.arraycopy(index, c >> SHIFT_, index, indexLength,
740                                          SURROGATE_BLOCK_COUNT_);
741                         indexLength += SURROGATE_BLOCK_COUNT_;
742                     }
743                 }
744                 c += 0x400;
745             }
746             else {
747                 c += DATA_BLOCK_LENGTH;
748             }
749         }
750 
751         // index array overflow?
752         // This is to guarantee that a folding offset is of the form
753         // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
754         // If the index is too large, then n>=1024 and more than 10 bits are
755         // necessary.
756         // In fact, it can only ever become n==1024 with completely unfoldable
757         // data and the additional block of duplicated values for lead
758         // surrogates.
759         if (indexLength >= MAX_INDEX_LENGTH_) {
760             throw new ArrayIndexOutOfBoundsException("Index table overflow");
761         }
762         // make space for the lead surrogate index block and insert it between
763         // the BMP indexes and the folded ones
764         System.arraycopy(index, BMP_INDEX_LENGTH_, index,
765                          BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
766                          indexLength - BMP_INDEX_LENGTH_);
767         System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
768                          SURROGATE_BLOCK_COUNT_);
769         indexLength += SURROGATE_BLOCK_COUNT_;
770         m_indexLength_ = indexLength;
771     }
772 
773     /**
774      * @hide draft / provisional / internal are hidden on OHOS
775      */
fillBlock(int block, int start, int limit, int value, boolean overwrite)776     private void fillBlock(int block, int start, int limit, int value,
777                            boolean overwrite)
778     {
779         limit += block;
780         block += start;
781         if (overwrite) {
782             while (block < limit) {
783                 m_data_[block ++] = value;
784             }
785         }
786         else {
787             while (block < limit) {
788                 if (m_data_[block] == m_initialValue_) {
789                     m_data_[block] = value;
790                 }
791                 ++ block;
792             }
793         }
794     }
795 }
796 
797