• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 * Copyright (C) 2013-2014, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 * CollationRootElements.java, ported from collationrootelements.h/.cpp
10 *
11 * C++ version created on: 2013mar01
12 * created by: Markus W. Scherer
13 */
14 
15 package ohos.global.icu.impl.coll;
16 
17 /**
18  * Container and access methods for collation elements and weights
19  * that occur in the root collator.
20  * Needed for finding boundaries for building a tailoring.
21  *
22  * This class takes and returns 16-bit secondary and tertiary weights.
23  * @hide exposed on OHOS
24  */
25 public final class CollationRootElements {
CollationRootElements(long[] rootElements)26     public CollationRootElements(long[] rootElements) {
27         elements = rootElements;
28     }
29 
30     /**
31      * Higher than any root primary.
32      */
33     public static final long PRIMARY_SENTINEL = 0xffffff00L;
34 
35     /**
36      * Flag in a root element, set if the element contains secondary & tertiary weights,
37      * rather than a primary.
38      */
39     public static final int SEC_TER_DELTA_FLAG = 0x80;
40     /**
41      * Mask for getting the primary range step value from a primary-range-end element.
42      */
43     public static final int PRIMARY_STEP_MASK = 0x7f;
44 
45     /**
46      * Index of the first CE with a non-zero tertiary weight.
47      * Same as the start of the compact root elements table.
48      */
49     public static final int IX_FIRST_TERTIARY_INDEX = 0;
50     /**
51      * Index of the first CE with a non-zero secondary weight.
52      */
53     static final int IX_FIRST_SECONDARY_INDEX = 1;
54     /**
55      * Index of the first CE with a non-zero primary weight.
56      */
57     static final int IX_FIRST_PRIMARY_INDEX = 2;
58     /**
59      * Must match Collation.COMMON_SEC_AND_TER_CE.
60      */
61     static final int IX_COMMON_SEC_AND_TER_CE = 3;
62     /**
63      * Secondary & tertiary boundaries.
64      * Bits 31..24: [fixed last secondary common byte 45]
65      * Bits 23..16: [fixed first ignorable secondary byte 80]
66      * Bits 15.. 8: reserved, 0
67      * Bits  7.. 0: [fixed first ignorable tertiary byte 3C]
68      */
69     static final int IX_SEC_TER_BOUNDARIES = 4;
70     /**
71      * The current number of indexes.
72      * Currently the same as elements[IX_FIRST_TERTIARY_INDEX].
73      */
74     static final int IX_COUNT = 5;
75 
76     /**
77      * Returns the boundary between tertiary weights of primary/secondary CEs
78      * and those of tertiary CEs.
79      * This is the upper limit for tertiaries of primary/secondary CEs.
80      * This minus one is the lower limit for tertiaries of tertiary CEs.
81      */
getTertiaryBoundary()82     public int getTertiaryBoundary() {
83         return ((int)elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00;
84     }
85 
86     /**
87      * Returns the first assigned tertiary CE.
88      */
getFirstTertiaryCE()89     long getFirstTertiaryCE() {
90         return elements[(int)elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
91     }
92 
93     /**
94      * Returns the last assigned tertiary CE.
95      */
getLastTertiaryCE()96     long getLastTertiaryCE() {
97         return elements[(int)elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
98     }
99 
100     /**
101      * Returns the last common secondary weight.
102      * This is the lower limit for secondaries of primary CEs.
103      */
getLastCommonSecondary()104     public int getLastCommonSecondary() {
105         return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00;
106     }
107 
108     /**
109      * Returns the boundary between secondary weights of primary CEs
110      * and those of secondary CEs.
111      * This is the upper limit for secondaries of primary CEs.
112      * This minus one is the lower limit for secondaries of secondary CEs.
113      */
getSecondaryBoundary()114     public int getSecondaryBoundary() {
115         return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00;
116     }
117 
118     /**
119      * Returns the first assigned secondary CE.
120      */
getFirstSecondaryCE()121     long getFirstSecondaryCE() {
122         return elements[(int)elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
123     }
124 
125     /**
126      * Returns the last assigned secondary CE.
127      */
getLastSecondaryCE()128     long getLastSecondaryCE() {
129         return elements[(int)elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
130     }
131 
132     /**
133      * Returns the first assigned primary weight.
134      */
getFirstPrimary()135     long getFirstPrimary() {
136         return elements[(int)elements[IX_FIRST_PRIMARY_INDEX]];  // step=0: cannot be a range end
137     }
138 
139     /**
140      * Returns the first assigned primary CE.
141      */
getFirstPrimaryCE()142     long getFirstPrimaryCE() {
143         return Collation.makeCE(getFirstPrimary());
144     }
145 
146     /**
147      * Returns the last root CE with a primary weight before p.
148      * Intended only for reordering group boundaries.
149      */
lastCEWithPrimaryBefore(long p)150     long lastCEWithPrimaryBefore(long p) {
151         if(p == 0) { return 0; }
152         assert(p > elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]);
153         int index = findP(p);
154         long q = elements[index];
155         long secTer;
156         if(p == (q & 0xffffff00L)) {
157             // p == elements[index] is a root primary. Find the CE before it.
158             // We must not be in a primary range.
159             assert((q & PRIMARY_STEP_MASK) == 0);
160             secTer = elements[index - 1];
161             if((secTer & SEC_TER_DELTA_FLAG) == 0) {
162                 // Primary CE just before p.
163                 p = secTer & 0xffffff00L;
164                 secTer = Collation.COMMON_SEC_AND_TER_CE;
165             } else {
166                 // secTer = last secondary & tertiary for the previous primary
167                 index -= 2;
168                 for(;;) {
169                     p = elements[index];
170                     if((p & SEC_TER_DELTA_FLAG) == 0) {
171                         p &= 0xffffff00L;
172                         break;
173                     }
174                     --index;
175                 }
176             }
177         } else {
178             // p > elements[index] which is the previous primary.
179             // Find the last secondary & tertiary weights for it.
180             p = q & 0xffffff00L;
181             secTer = Collation.COMMON_SEC_AND_TER_CE;
182             for(;;) {
183                 q = elements[++index];
184                 if((q & SEC_TER_DELTA_FLAG) == 0) {
185                     // We must not be in a primary range.
186                     assert((q & PRIMARY_STEP_MASK) == 0);
187                     break;
188                 }
189                 secTer = q;
190             }
191         }
192         return (p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
193     }
194 
195     /**
196      * Returns the first root CE with a primary weight of at least p.
197      * Intended only for reordering group boundaries.
198      */
firstCEWithPrimaryAtLeast(long p)199     long firstCEWithPrimaryAtLeast(long p) {
200         if(p == 0) { return 0; }
201         int index = findP(p);
202         if(p != (elements[index] & 0xffffff00L)) {
203             for(;;) {
204                 p = elements[++index];
205                 if((p & SEC_TER_DELTA_FLAG) == 0) {
206                     // First primary after p. We must not be in a primary range.
207                     assert((p & PRIMARY_STEP_MASK) == 0);
208                     break;
209                 }
210             }
211         }
212         // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
213         return (p << 32) | Collation.COMMON_SEC_AND_TER_CE;
214     }
215 
216     /**
217      * Returns the primary weight before p.
218      * p must be greater than the first root primary.
219      */
getPrimaryBefore(long p, boolean isCompressible)220     long getPrimaryBefore(long p, boolean isCompressible) {
221         int index = findPrimary(p);
222         int step;
223         long q = elements[index];
224         if(p == (q & 0xffffff00L)) {
225             // Found p itself. Return the previous primary.
226             // See if p is at the end of a previous range.
227             step = (int)q & PRIMARY_STEP_MASK;
228             if(step == 0) {
229                 // p is not at the end of a range. Look for the previous primary.
230                 do {
231                     p = elements[--index];
232                 } while((p & SEC_TER_DELTA_FLAG) != 0);
233                 return p & 0xffffff00L;
234             }
235         } else {
236             // p is in a range, and not at the start.
237             long nextElement = elements[index + 1];
238             assert(isEndOfPrimaryRange(nextElement));
239             step = (int)nextElement & PRIMARY_STEP_MASK;
240         }
241         // Return the previous range primary.
242         if((p & 0xffff) == 0) {
243             return Collation.decTwoBytePrimaryByOneStep(p, isCompressible, step);
244         } else {
245             return Collation.decThreeBytePrimaryByOneStep(p, isCompressible, step);
246         }
247     }
248 
249     /** Returns the secondary weight before [p, s]. */
getSecondaryBefore(long p, int s)250     int getSecondaryBefore(long p, int s) {
251         int index;
252         int previousSec, sec;
253         if(p == 0) {
254             index = (int)elements[IX_FIRST_SECONDARY_INDEX];
255             // Gap at the beginning of the secondary CE range.
256             previousSec = 0;
257             sec = (int)(elements[index] >> 16);
258         } else {
259             index = findPrimary(p) + 1;
260             previousSec = Collation.BEFORE_WEIGHT16;
261             sec = (int)getFirstSecTerForPrimary(index) >>> 16;
262         }
263         assert(s >= sec);
264         while(s > sec) {
265             previousSec = sec;
266             assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
267             sec = (int)(elements[index++] >> 16);
268         }
269         assert(sec == s);
270         return previousSec;
271     }
272 
273     /** Returns the tertiary weight before [p, s, t]. */
getTertiaryBefore(long p, int s, int t)274     int getTertiaryBefore(long p, int s, int t) {
275         assert((t & ~Collation.ONLY_TERTIARY_MASK) == 0);
276         int index;
277         int previousTer;
278         long secTer;
279         if(p == 0) {
280             if(s == 0) {
281                 index = (int)elements[IX_FIRST_TERTIARY_INDEX];
282                 // Gap at the beginning of the tertiary CE range.
283                 previousTer = 0;
284             } else {
285                 index = (int)elements[IX_FIRST_SECONDARY_INDEX];
286                 previousTer = Collation.BEFORE_WEIGHT16;
287             }
288             secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
289         } else {
290             index = findPrimary(p) + 1;
291             previousTer = Collation.BEFORE_WEIGHT16;
292             secTer = getFirstSecTerForPrimary(index);
293         }
294         long st = ((long)s << 16) | t;
295         while(st > secTer) {
296             if((int)(secTer >> 16) == s) { previousTer = (int)secTer; }
297             assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
298             secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
299         }
300         assert(secTer == st);
301         return previousTer & 0xffff;
302     }
303 
304     /**
305      * Finds the index of the input primary.
306      * p must occur as a root primary, and must not be 0.
307      */
findPrimary(long p)308     int findPrimary(long p) {
309         // Requirement: p must occur as a root primary.
310         assert((p & 0xff) == 0);  // at most a 3-byte primary
311         int index = findP(p);
312         // If p is in a range, then we just assume that p is an actual primary in this range.
313         // (Too cumbersome/expensive to check.)
314         // Otherwise, it must be an exact match.
315         assert(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00L));
316         return index;
317     }
318 
319     /**
320      * Returns the primary weight after p where index=findPrimary(p).
321      * p must be at least the first root primary.
322      */
getPrimaryAfter(long p, int index, boolean isCompressible)323     long getPrimaryAfter(long p, int index, boolean isCompressible) {
324         assert(p == (elements[index] & 0xffffff00L) || isEndOfPrimaryRange(elements[index + 1]));
325         long q = elements[++index];
326         int step;
327         if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int)q & PRIMARY_STEP_MASK) != 0) {
328             // Return the next primary in this range.
329             if((p & 0xffff) == 0) {
330                 return Collation.incTwoBytePrimaryByOffset(p, isCompressible, step);
331             } else {
332                 return Collation.incThreeBytePrimaryByOffset(p, isCompressible, step);
333             }
334         } else {
335             // Return the next primary in the list.
336             while((q & SEC_TER_DELTA_FLAG) != 0) {
337                 q = elements[++index];
338             }
339             assert((q & PRIMARY_STEP_MASK) == 0);
340             return q;
341         }
342     }
343     /**
344      * Returns the secondary weight after [p, s] where index=findPrimary(p)
345      * except use index=0 for p=0.
346      *
347      * <p>Must return a weight for every root [p, s] as well as for every weight
348      * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16.
349      *
350      * <p>Exception: [0, 0] is handled by the CollationBuilder:
351      * Both its lower and upper boundaries are special.
352      */
getSecondaryAfter(int index, int s)353     int getSecondaryAfter(int index, int s) {
354         long secTer;
355         int secLimit;
356         if(index == 0) {
357             // primary = 0
358             assert(s != 0);
359             index = (int)elements[IX_FIRST_SECONDARY_INDEX];
360             secTer = elements[index];
361             // Gap at the end of the secondary CE range.
362             secLimit = 0x10000;
363         } else {
364             assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
365             secTer = getFirstSecTerForPrimary(index + 1);
366             // If this is an explicit sec/ter unit, then it will be read once more.
367             // Gap for secondaries of primary CEs.
368             secLimit = getSecondaryBoundary();
369         }
370         for(;;) {
371             int sec = (int)(secTer >> 16);
372             if(sec > s) { return sec; }
373             secTer = elements[++index];
374             if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
375         }
376     }
377     /**
378      * Returns the tertiary weight after [p, s, t] where index=findPrimary(p)
379      * except use index=0 for p=0.
380      *
381      * <p>Must return a weight for every root [p, s, t] as well as for every weight
382      * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16.
383      *
384      * <p>Exception: [0, 0, 0] is handled by the CollationBuilder:
385      * Both its lower and upper boundaries are special.
386      */
getTertiaryAfter(int index, int s, int t)387     int getTertiaryAfter(int index, int s, int t) {
388         long secTer;
389         int terLimit;
390         if(index == 0) {
391             // primary = 0
392             if(s == 0) {
393                 assert(t != 0);
394                 index = (int)elements[IX_FIRST_TERTIARY_INDEX];
395                 // Gap at the end of the tertiary CE range.
396                 terLimit = 0x4000;
397             } else {
398                 index = (int)elements[IX_FIRST_SECONDARY_INDEX];
399                 // Gap for tertiaries of primary/secondary CEs.
400                 terLimit = getTertiaryBoundary();
401             }
402             secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
403         } else {
404             assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
405             secTer = getFirstSecTerForPrimary(index + 1);
406             // If this is an explicit sec/ter unit, then it will be read once more.
407             terLimit = getTertiaryBoundary();
408         }
409         long st = (((long)s & 0xffffffffL) << 16) | t;
410         for(;;) {
411             if(secTer > st) {
412                 assert((secTer >> 16) == s);
413                 return (int)secTer & 0xffff;
414             }
415             secTer = elements[++index];
416             // No tertiary greater than t for this primary+secondary.
417             if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
418             secTer &= ~SEC_TER_DELTA_FLAG;
419         }
420     }
421 
422     /**
423      * Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1.
424      */
getFirstSecTerForPrimary(int index)425     private long getFirstSecTerForPrimary(int index) {
426         long secTer = elements[index];
427         if((secTer & SEC_TER_DELTA_FLAG) == 0) {
428             // No sec/ter delta.
429             return Collation.COMMON_SEC_AND_TER_CE;
430         }
431         secTer &= ~SEC_TER_DELTA_FLAG;
432         if(secTer > Collation.COMMON_SEC_AND_TER_CE) {
433             // Implied sec/ter.
434             return Collation.COMMON_SEC_AND_TER_CE;
435         }
436         // Explicit sec/ter below common/common.
437         return secTer;
438     }
439 
440     /**
441      * Finds the largest index i where elements[i]<=p.
442      * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).
443      * Does not require that p is a root collator primary.
444      */
findP(long p)445     private int findP(long p) {
446         // p need not occur as a root primary.
447         // For example, it might be a reordering group boundary.
448         assert((p >> 24) != Collation.UNASSIGNED_IMPLICIT_BYTE);
449         // modified binary search
450         int start = (int)elements[IX_FIRST_PRIMARY_INDEX];
451         assert(p >= elements[start]);
452         int limit = elements.length - 1;
453         assert(elements[limit] >= PRIMARY_SENTINEL);
454         assert(p < elements[limit]);
455         while((start + 1) < limit) {
456             // Invariant: elements[start] and elements[limit] are primaries,
457             // and elements[start]<=p<=elements[limit].
458             int i = (int)(((long)start + (long)limit) / 2);
459             long q = elements[i];
460             if((q & SEC_TER_DELTA_FLAG) != 0) {
461                 // Find the next primary.
462                 int j = i + 1;
463                 for(;;) {
464                     if(j == limit) { break; }
465                     q = elements[j];
466                     if((q & SEC_TER_DELTA_FLAG) == 0) {
467                         i = j;
468                         break;
469                     }
470                     ++j;
471                 }
472                 if((q & SEC_TER_DELTA_FLAG) != 0) {
473                     // Find the preceding primary.
474                     j = i - 1;
475                     for(;;) {
476                         if(j == start) { break; }
477                         q = elements[j];
478                         if((q & SEC_TER_DELTA_FLAG) == 0) {
479                             i = j;
480                             break;
481                         }
482                         --j;
483                     }
484                     if((q & SEC_TER_DELTA_FLAG) != 0) {
485                         // No primary between start and limit.
486                         break;
487                     }
488                 }
489             }
490             if(p < (q & 0xffffff00L)) {  // Reset the "step" bits of a range end primary.
491                 limit = i;
492             } else {
493                 start = i;
494             }
495         }
496         return start;
497     }
498 
499     private static boolean isEndOfPrimaryRange(long q) {
500         return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0;
501     }
502 
503     /**
504      * Data structure: See ICU4C source/i18n/collationrootelements.h.
505      */
506     private long[] elements;
507 }
508