• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  ******************************************************************************
6  * Copyright (C) 1996-2011, International Business Machines Corporation and   *
7  * others. All Rights Reserved.                                               *
8  ******************************************************************************
9  */
10 
11 /**
12  * Store bits (Unicode character properties) in bit set vectors.
13  *
14  * This is a port of the C++ class UPropsVectors from ICU4C
15  *
16  * @author Shaopeng Jia
17  * @hide draft / provisional / internal are hidden on OHOS
18  */
19 
20 package ohos.global.icu.impl;
21 
22 import java.util.Arrays;
23 import java.util.Comparator;
24 
25 /**
26  * Unicode Properties Vectors associated with code point ranges.
27  *
28  * Rows of primitive integers in a contiguous array store the range limits and
29  * the properties vectors.
30  *
31  * In each row, row[0] contains the start code point and row[1] contains the
32  * limit code point, which is the start of the next range.
33  *
34  * Initially, there is only one range [0..0x110000] with values 0.
35  *
36  * It would be possible to store only one range boundary per row, but
37  * self-contained rows allow to later sort them by contents.
38  * @hide exposed on OHOS
39  */
40 public class PropsVectors {
41     private int v[];
42     private int columns; // number of columns, plus two for start
43     // and limit values
44     private int maxRows;
45     private int rows;
46     private int prevRow; // search optimization: remember last row seen
47     private boolean isCompacted;
48 
49     // internal function to compare elements in v and target. Return true iff
50     // elements in v starting from index1 to index1 + length - 1
51     // are exactly the same as elements in target
52     // starting from index2 to index2 + length - 1
areElementsSame(int index1, int[] target, int index2, int length)53     private boolean areElementsSame(int index1, int[] target, int index2,
54             int length) {
55         for (int i = 0; i < length; ++i) {
56             if (v[index1 + i] != target[index2 + i]) {
57                 return false;
58             }
59         }
60         return true;
61     }
62 
63     // internal function which given rangeStart, returns
64     // index where v[index]<=rangeStart<v[index+1].
65     // The returned index is a multiple of columns, and therefore
66     // points to the start of a row.
findRow(int rangeStart)67     private int findRow(int rangeStart) {
68         int index = 0;
69 
70         // check the vicinity of the last-seen row (start
71         // searching with an unrolled loop)
72 
73         index = prevRow * columns;
74         if (rangeStart >= v[index]) {
75             if (rangeStart < v[index + 1]) {
76                 // same row as last seen
77                 return index;
78             } else {
79                 index += columns;
80                 if (rangeStart < v[index + 1]) {
81                     ++prevRow;
82                     return index;
83                 } else {
84                     index += columns;
85                     if (rangeStart < v[index + 1]) {
86                         prevRow += 2;
87                         return index;
88                     } else if ((rangeStart - v[index + 1]) < 10) {
89                         // we are close, continue looping
90                         prevRow += 2;
91                         do {
92                             ++prevRow;
93                             index += columns;
94                         } while (rangeStart >= v[index + 1]);
95                         return index;
96                     }
97                 }
98             }
99         } else if (rangeStart < v[1]) {
100             // the very first row
101             prevRow = 0;
102             return 0;
103         }
104 
105         // do a binary search for the start of the range
106         int start = 0;
107         int mid = 0;
108         int limit = rows;
109         while (start < limit - 1) {
110             mid = (start + limit) / 2;
111             index = columns * mid;
112             if (rangeStart < v[index]) {
113                 limit = mid;
114             } else if (rangeStart < v[index + 1]) {
115                 prevRow = mid;
116                 return index;
117             } else {
118                 start = mid;
119             }
120         }
121 
122         // must be found because all ranges together always cover
123         // all of Unicode
124         prevRow = start;
125         index = start * columns;
126         return index;
127     }
128 
129     /*
130      * Special pseudo code points for storing the initialValue and the
131      * errorValue which are used to initialize a Trie or similar.
132      */
133     public final static int FIRST_SPECIAL_CP = 0x110000;
134     public final static int INITIAL_VALUE_CP = 0x110000;
135     public final static int ERROR_VALUE_CP = 0x110001;
136     public final static int MAX_CP = 0x110001;
137 
138     public final static int INITIAL_ROWS = 1 << 12;
139     public final static int MEDIUM_ROWS = 1 << 16;
140     public final static int MAX_ROWS = MAX_CP + 1;
141 
142     /*
143      * Constructor.
144      * @param numOfColumns Number of value integers (32-bit int) per row.
145      */
PropsVectors(int numOfColumns)146     public PropsVectors(int numOfColumns) {
147         if (numOfColumns < 1) {
148             throw new IllegalArgumentException("numOfColumns need to be no "
149                     + "less than 1; but it is " + numOfColumns);
150         }
151         columns = numOfColumns + 2; // count range start and limit columns
152         v = new int[INITIAL_ROWS * columns];
153         maxRows = INITIAL_ROWS;
154         rows = 2 + (MAX_CP - FIRST_SPECIAL_CP);
155         prevRow = 0;
156         isCompacted = false;
157         v[0] = 0;
158         v[1] = 0x110000;
159         int index = columns;
160         for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) {
161             v[index] = cp;
162             v[index + 1] = cp + 1;
163             index += columns;
164         }
165     }
166 
167     /*
168      * In rows for code points [start..end], select the column, reset the mask
169      * bits and set the value bits (ANDed with the mask).
170      *
171      * @throws IllegalArgumentException
172      *
173      * @throws IllegalStateException
174      *
175      * @throws IndexOutOfBoundsException
176      */
setValue(int start, int end, int column, int value, int mask)177     public void setValue(int start, int end, int column, int value, int mask) {
178         if (start < 0 || start > end || end > MAX_CP || column < 0
179                 || column >= (columns - 2)) {
180             throw new IllegalArgumentException();
181         }
182         if (isCompacted) {
183             throw new IllegalStateException("Shouldn't be called after"
184                     + "compact()!");
185         }
186 
187         int firstRow, lastRow;
188         int limit = end + 1;
189         boolean splitFirstRow, splitLastRow;
190         // skip range start and limit columns
191         column += 2;
192         value &= mask;
193 
194         // find the rows whose ranges overlap with the input range
195         // find the first and last row, always successful
196         firstRow = findRow(start);
197         lastRow = findRow(end);
198 
199         /*
200          * Rows need to be split if they partially overlap with the input range
201          * (only possible for the first and last rows) and if their value
202          * differs from the input value.
203          */
204         splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask));
205         splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask));
206 
207         // split first/last rows if necessary
208         if (splitFirstRow || splitLastRow) {
209             int rowsToExpand = 0;
210             if (splitFirstRow) {
211                 ++rowsToExpand;
212             }
213             if (splitLastRow) {
214                 ++rowsToExpand;
215             }
216             int newMaxRows = 0;
217             if ((rows + rowsToExpand) > maxRows) {
218                 if (maxRows < MEDIUM_ROWS) {
219                     newMaxRows = MEDIUM_ROWS;
220                 } else if (maxRows < MAX_ROWS) {
221                     newMaxRows = MAX_ROWS;
222                 } else {
223                     throw new IndexOutOfBoundsException(
224                             "MAX_ROWS exceeded! Increase it to a higher value" +
225                             "in the implementation");
226                 }
227                 int[] temp = new int[newMaxRows * columns];
228                 System.arraycopy(v, 0, temp, 0, rows * columns);
229                 v = temp;
230                 maxRows = newMaxRows;
231             }
232 
233             // count the number of row cells to move after the last row,
234             // and move them
235             int count = (rows * columns) - (lastRow + columns);
236             if (count > 0) {
237                 System.arraycopy(v, lastRow + columns, v, lastRow
238                         + (1 + rowsToExpand) * columns, count);
239             }
240             rows += rowsToExpand;
241 
242             // split the first row, and move the firstRow pointer
243             // to the second part
244             if (splitFirstRow) {
245                 // copy all affected rows up one and move the lastRow pointer
246                 count = lastRow - firstRow + columns;
247                 System.arraycopy(v, firstRow, v, firstRow + columns, count);
248                 lastRow += columns;
249 
250                 // split the range and move the firstRow pointer
251                 v[firstRow + 1] = v[firstRow + columns] = start;
252                 firstRow += columns;
253             }
254 
255             // split the last row
256             if (splitLastRow) {
257                 // copy the last row data
258                 System.arraycopy(v, lastRow, v, lastRow + columns, columns);
259 
260                 // split the range and move the firstRow pointer
261                 v[lastRow + 1] = v[lastRow + columns] = limit;
262             }
263         }
264 
265         // set the "row last seen" to the last row for the range
266         prevRow = lastRow / columns;
267 
268         // set the input value in all remaining rows
269         firstRow += column;
270         lastRow += column;
271         mask = ~mask;
272         for (;;) {
273             v[firstRow] = (v[firstRow] & mask) | value;
274             if (firstRow == lastRow) {
275                 break;
276             }
277             firstRow += columns;
278         }
279     }
280 
281     /*
282      * Always returns 0 if called after compact().
283      */
getValue(int c, int column)284     public int getValue(int c, int column) {
285         if (isCompacted || c < 0 || c > MAX_CP || column < 0
286                 || column >= (columns - 2)) {
287             return 0;
288         }
289         int index = findRow(c);
290         return v[index + 2 + column];
291     }
292 
293     /*
294      * Returns an array which contains value elements
295      * in row rowIndex.
296      *
297      * @throws IllegalStateException
298      * @throws IllegalArgumentException
299      */
getRow(int rowIndex)300     public int[] getRow(int rowIndex) {
301         if (isCompacted) {
302             throw new IllegalStateException(
303                     "Illegal Invocation of the method after compact()");
304         }
305         if (rowIndex < 0 || rowIndex > rows) {
306             throw new IllegalArgumentException("rowIndex out of bound!");
307         }
308         int[] rowToReturn = new int[columns - 2];
309         System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0,
310                          columns - 2);
311         return rowToReturn;
312     }
313 
314     /*
315      * Returns an int which is the start codepoint
316      * in row rowIndex.
317      *
318      * @throws IllegalStateException
319      *
320      * @throws IllegalArgumentException
321      */
getRowStart(int rowIndex)322     public int getRowStart(int rowIndex) {
323         if (isCompacted) {
324             throw new IllegalStateException(
325                     "Illegal Invocation of the method after compact()");
326         }
327         if (rowIndex < 0 || rowIndex > rows) {
328             throw new IllegalArgumentException("rowIndex out of bound!");
329         }
330         return v[rowIndex * columns];
331     }
332 
333     /*
334      * Returns an int which is the limit codepoint
335      * minus 1 in row rowIndex.
336      *
337      * @throws IllegalStateException
338      *
339      * @throws IllegalArgumentException
340      */
getRowEnd(int rowIndex)341     public int getRowEnd(int rowIndex) {
342         if (isCompacted) {
343             throw new IllegalStateException(
344                     "Illegal Invocation of the method after compact()");
345         }
346         if (rowIndex < 0 || rowIndex > rows) {
347             throw new IllegalArgumentException("rowIndex out of bound!");
348         }
349         return v[rowIndex * columns + 1] - 1;
350     }
351 
352     /*
353      * Compact the vectors:
354      * - modify the memory
355      * - keep only unique vectors
356      * - store them contiguously from the beginning of the memory
357      * - for each (non-unique) row, call the respective function in
358      *   CompactHandler
359      *
360      * The handler's rowIndex is the index of the row in the compacted
361      * memory block. Therefore, it starts at 0 increases in increments of the
362      * columns value.
363      *
364      * In a first phase, only special values are delivered (each exactly once).
365      * Then CompactHandler::startRealValues() is called
366      * where rowIndex is the length of the compacted array.
367      * Then, in the second phase, the CompactHandler::setRowIndexForRange() is
368      * called for each row of real values.
369      */
compact(CompactHandler compactor)370     public void compact(CompactHandler compactor) {
371         if (isCompacted) {
372             return;
373         }
374 
375         // Set the flag now: Sorting and compacting destroys the builder
376         // data structure.
377         isCompacted = true;
378         int valueColumns = columns - 2; // not counting start & limit
379 
380         // sort the properties vectors to find unique vector values
381         Integer[] indexArray = new Integer[rows];
382         for (int i = 0; i < rows; ++i) {
383             indexArray[i] = Integer.valueOf(columns * i);
384         }
385 
386         Arrays.sort(indexArray, new Comparator<Integer>() {
387             @Override
388             public int compare(Integer o1, Integer o2) {
389                 int indexOfRow1 = o1.intValue();
390                 int indexOfRow2 = o2.intValue();
391                 int count = columns; // includes start/limit columns
392 
393                 // start comparing after start/limit
394                 // but wrap around to them
395                 int index = 2;
396                 do {
397                     if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) {
398                         return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1
399                                 : 1;
400                     }
401                     if (++index == columns) {
402                         index = 0;
403                     }
404                 } while (--count > 0);
405 
406                 return 0;
407             }
408         });
409 
410         /*
411          * Find and set the special values. This has to do almost the same work
412          * as the compaction below, to find the indexes where the special-value
413          * rows will move.
414          */
415         int count = -valueColumns;
416         for (int i = 0; i < rows; ++i) {
417             int start = v[indexArray[i].intValue()];
418 
419             // count a new values vector if it is different
420             // from the current one
421             if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v,
422                     indexArray[i-1].intValue() + 2, valueColumns)) {
423                 count += valueColumns;
424             }
425 
426             if (start == INITIAL_VALUE_CP) {
427                 compactor.setRowIndexForInitialValue(count);
428             } else if (start == ERROR_VALUE_CP) {
429                 compactor.setRowIndexForErrorValue(count);
430             }
431         }
432 
433         // count is at the beginning of the last vector,
434         // add valueColumns to include that last vector
435         count += valueColumns;
436 
437         // Call the handler once more to signal the start of
438         // delivering real values.
439         compactor.startRealValues(count);
440 
441         /*
442          * Move vector contents up to a contiguous array with only unique
443          * vector values, and call the handler function for each vector.
444          *
445          * This destroys the Properties Vector structure and replaces it
446          * with an array of just vector values.
447          */
448         int[] temp = new int[count];
449         count = -valueColumns;
450         for (int i = 0; i < rows; ++i) {
451             int start = v[indexArray[i].intValue()];
452             int limit = v[indexArray[i].intValue() + 1];
453 
454             // count a new values vector if it is different
455             // from the current one
456             if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
457                     temp, count, valueColumns)) {
458                 count += valueColumns;
459                 System.arraycopy(v, indexArray[i].intValue() + 2, temp, count,
460                         valueColumns);
461             }
462 
463             if (start < FIRST_SPECIAL_CP) {
464                 compactor.setRowIndexForRange(start, limit - 1, count);
465             }
466         }
467         v = temp;
468 
469         // count is at the beginning of the last vector,
470         // add one to include that last vector
471         rows = count / valueColumns + 1;
472     }
473 
474     /*
475      * Get the vectors array after calling compact().
476      *
477      * @throws IllegalStateException
478      */
getCompactedArray()479     public int[] getCompactedArray() {
480         if (!isCompacted) {
481             throw new IllegalStateException(
482                     "Illegal Invocation of the method before compact()");
483         }
484         return v;
485     }
486 
487     /*
488      * Get the number of rows for the compacted array.
489      *
490      * @throws IllegalStateException
491      */
getCompactedRows()492     public int getCompactedRows() {
493         if (!isCompacted) {
494             throw new IllegalStateException(
495                     "Illegal Invocation of the method before compact()");
496         }
497         return rows;
498     }
499 
500     /*
501      * Get the number of columns for the compacted array.
502      *
503      * @throws IllegalStateException
504      */
getCompactedColumns()505     public int getCompactedColumns() {
506         if (!isCompacted) {
507             throw new IllegalStateException(
508                     "Illegal Invocation of the method before compact()");
509         }
510         return columns - 2;
511     }
512 
513     /*
514      * Call compact(), create a IntTrie with indexes into the compacted
515      * vectors array.
516      */
compactToTrieWithRowIndexes()517     public IntTrie compactToTrieWithRowIndexes() {
518         PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler();
519         compact(compactor);
520         return compactor.builder.serialize(new DefaultGetFoldedValue(
521                 compactor.builder), new DefaultGetFoldingOffset());
522     }
523 
524     // inner class implementation of Trie.DataManipulate
525     private static class DefaultGetFoldingOffset implements Trie.DataManipulate {
526         @Override
getFoldingOffset(int value)527         public int getFoldingOffset(int value) {
528             return value;
529         }
530     }
531 
532     // inner class implementation of TrieBuilder.DataManipulate
533     private static class DefaultGetFoldedValue implements
534             TrieBuilder.DataManipulate {
535         private IntTrieBuilder builder;
536 
DefaultGetFoldedValue(IntTrieBuilder inBuilder)537         public DefaultGetFoldedValue(IntTrieBuilder inBuilder) {
538             builder = inBuilder;
539         }
540 
541         @Override
getFoldedValue(int start, int offset)542         public int getFoldedValue(int start, int offset) {
543             int initialValue = builder.m_initialValue_;
544             int limit = start + 0x400;
545             while (start < limit) {
546                 boolean[] inBlockZero = new boolean[1];
547                 int value = builder.getValue(start, inBlockZero);
548                 if (inBlockZero[0]) {
549                     start += TrieBuilder.DATA_BLOCK_LENGTH;
550                 } else if (value != initialValue) {
551                     return offset;
552                 } else {
553                     ++start;
554                 }
555             }
556             return 0;
557         }
558     }
559 
560     /**
561      * @hide exposed on OHOS
562      */
563     public static interface CompactHandler {
setRowIndexForRange(int start, int end, int rowIndex)564         public void setRowIndexForRange(int start, int end, int rowIndex);
setRowIndexForInitialValue(int rowIndex)565         public void setRowIndexForInitialValue(int rowIndex);
setRowIndexForErrorValue(int rowIndex)566         public void setRowIndexForErrorValue(int rowIndex);
startRealValues(int rowIndex)567         public void startRealValues(int rowIndex);
568     }
569 }