• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 *
7 *   Copyright (C) 1999-2015, International Business Machines
8 *   Corporation and others.  All Rights Reserved.
9 *
10 *******************************************************************************
11 *   CollationWeights.java, ported from collationweights.h/.cpp
12 *
13 *   C++ version created on: 2001mar08 as ucol_wgt.h
14 *   created by: Markus W. Scherer
15 */
16 
17 package ohos.global.icu.impl.coll;
18 
19 import java.util.Arrays;
20 
21 /**
22  * Allocates n collation element weights between two exclusive limits.
23  * Used only internally by the collation tailoring builder.
24  * @hide exposed on OHOS
25  */
26 public final class CollationWeights {
CollationWeights()27     public CollationWeights() {}
28 
initForPrimary(boolean compressible)29     public void initForPrimary(boolean compressible) {
30         middleLength=1;
31         minBytes[1] = Collation.MERGE_SEPARATOR_BYTE + 1;
32         maxBytes[1] = Collation.TRAIL_WEIGHT_BYTE;
33         if(compressible) {
34             minBytes[2] = Collation.PRIMARY_COMPRESSION_LOW_BYTE + 1;
35             maxBytes[2] = Collation.PRIMARY_COMPRESSION_HIGH_BYTE - 1;
36         } else {
37             minBytes[2] = 2;
38             maxBytes[2] = 0xff;
39         }
40         minBytes[3] = 2;
41         maxBytes[3] = 0xff;
42         minBytes[4] = 2;
43         maxBytes[4] = 0xff;
44     }
45 
initForSecondary()46     public void initForSecondary() {
47         // We use only the lower 16 bits for secondary weights.
48         middleLength=3;
49         minBytes[1] = 0;
50         maxBytes[1] = 0;
51         minBytes[2] = 0;
52         maxBytes[2] = 0;
53         minBytes[3] = Collation.LEVEL_SEPARATOR_BYTE + 1;
54         maxBytes[3] = 0xff;
55         minBytes[4] = 2;
56         maxBytes[4] = 0xff;
57     }
58 
initForTertiary()59     public void initForTertiary() {
60         // We use only the lower 16 bits for tertiary weights.
61         middleLength=3;
62         minBytes[1] = 0;
63         maxBytes[1] = 0;
64         minBytes[2] = 0;
65         maxBytes[2] = 0;
66         // We use only 6 bits per byte.
67         // The other bits are used for case & quaternary weights.
68         minBytes[3] = Collation.LEVEL_SEPARATOR_BYTE + 1;
69         maxBytes[3] = 0x3f;
70         minBytes[4] = 2;
71         maxBytes[4] = 0x3f;
72     }
73 
74     /**
75      * Determine heuristically
76      * what ranges to use for a given number of weights between (excluding)
77      * two limits.
78      *
79      * @param lowerLimit A collation element weight; the ranges will be filled to cover
80      *                   weights greater than this one.
81      * @param upperLimit A collation element weight; the ranges will be filled to cover
82      *                   weights less than this one.
83      * @param n          The number of collation element weights w necessary such that
84      *                   lowerLimit<w<upperLimit in lexical order.
85      * @return true if it is possible to fit n elements between the limits
86      */
allocWeights(long lowerLimit, long upperLimit, int n)87     public boolean allocWeights(long lowerLimit, long upperLimit, int n) {
88         // Call getWeightRanges() and then determine heuristically
89         // which ranges to use for a given number of weights between (excluding)
90         // two limits.
91         // puts("");
92 
93         if(!getWeightRanges(lowerLimit, upperLimit)) {
94             // printf("error: unable to get Weight ranges\n");
95             return false;
96         }
97 
98         /* try until we find suitably large ranges */
99         for(;;) {
100             /* get the smallest number of bytes in a range */
101             int minLength=ranges[0].length;
102 
103             if(allocWeightsInShortRanges(n, minLength)) { break; }
104 
105             if(minLength == 4) {
106                 // printf("error: the maximum number of %ld weights is insufficient for n=%ld\n",
107                 //       minLengthCount, n);
108                 return false;
109             }
110 
111             if(allocWeightsInMinLengthRanges(n, minLength)) { break; }
112 
113             /* no good match, lengthen all minLength ranges and iterate */
114             // printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1);
115             for (int i = 0; i < rangeCount && ranges[i].length == minLength; ++i) {
116                 lengthenRange(ranges[i]);
117             }
118         }
119 
120         /* puts("final ranges:");
121         for(int i=0; i<rangeCount; ++i) {
122             printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .count=%ld\n",
123                   i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].count);
124         } */
125 
126         rangeIndex = 0;
127         if(rangeCount < ranges.length) {
128             ranges[rangeCount] = null;  // force a crash when going out of bounds
129         }
130         return true;
131     }
132 
133     /**
134      * Given a set of ranges calculated by allocWeights(),
135      * iterate through the weights.
136      * The ranges are modified to keep the current iteration state.
137      *
138      * @return The next weight in the ranges, or 0xffffffff if there is none left.
139      */
nextWeight()140     public long nextWeight() {
141         if(rangeIndex >= rangeCount) {
142             return 0xffffffffL;
143         } else {
144             /* get the next weight */
145             WeightRange range = ranges[rangeIndex];
146             long weight = range.start;
147             if(--range.count == 0) {
148                 /* this range is finished */
149                 ++rangeIndex;
150             } else {
151                 /* increment the weight for the next value */
152                 range.start = incWeight(weight, range.length);
153                 assert(range.start <= range.end);
154             }
155 
156             return weight;
157         }
158     }
159 
160     /** @hide draft / provisional / internal are hidden on OHOS*/
161     private static final class WeightRange implements Comparable<WeightRange> {
162         long start, end;
163         int length, count;
164 
165         @Override
compareTo(WeightRange other)166         public int compareTo(WeightRange other) {
167             long l=start;
168             long r=other.start;
169             if(l<r) {
170                 return -1;
171             } else if(l>r) {
172                 return 1;
173             } else {
174                 return 0;
175             }
176         }
177     }
178 
179     /* helper functions for CE weights */
180 
lengthOfWeight(long weight)181     public static int lengthOfWeight(long weight) {
182         if((weight&0xffffff)==0) {
183             return 1;
184         } else if((weight&0xffff)==0) {
185             return 2;
186         } else if((weight&0xff)==0) {
187             return 3;
188         } else {
189             return 4;
190         }
191     }
192 
getWeightTrail(long weight, int length)193     private static int getWeightTrail(long weight, int length) {
194         return (int)(weight>>(8*(4-length)))&0xff;
195     }
196 
setWeightTrail(long weight, int length, int trail)197     private static long setWeightTrail(long weight, int length, int trail) {
198         length=8*(4-length);
199         return (weight&(0xffffff00L<<length))|((long)trail<<length);
200     }
201 
getWeightByte(long weight, int idx)202     private static int getWeightByte(long weight, int idx) {
203         return getWeightTrail(weight, idx); /* same calculation */
204     }
205 
setWeightByte(long weight, int idx, int b)206     private static long setWeightByte(long weight, int idx, int b) {
207         long mask; /* 0xffffffff except a 00 "hole" for the index-th byte */
208 
209         idx*=8;
210         if(idx<32) {
211             mask=0xffffffffL>>idx;
212         } else {
213             // Do not use int>>32 because on some platforms that does not shift at all
214             // while we need it to become 0.
215             // PowerPC: 0xffffffff>>32 = 0           (wanted)
216             // x86:     0xffffffff>>32 = 0xffffffff  (not wanted)
217             //
218             // ANSI C99 6.5.7 Bitwise shift operators:
219             // "If the value of the right operand is negative
220             // or is greater than or equal to the width of the promoted left operand,
221             // the behavior is undefined."
222             mask=0;
223         }
224         idx=32-idx;
225         mask|=0xffffff00L<<idx;
226         return (weight&mask)|((long)b<<idx);
227     }
228 
truncateWeight(long weight, int length)229     private static long truncateWeight(long weight, int length) {
230         return weight&(0xffffffffL<<(8*(4-length)));
231     }
232 
incWeightTrail(long weight, int length)233     private static long incWeightTrail(long weight, int length) {
234         return weight+(1L<<(8*(4-length)));
235     }
236 
decWeightTrail(long weight, int length)237     private static long decWeightTrail(long weight, int length) {
238         return weight-(1L<<(8*(4-length)));
239     }
240 
241     /** @return number of usable byte values for byte idx */
countBytes(int idx)242     private int countBytes(int idx) {
243         return maxBytes[idx] - minBytes[idx] + 1;
244     }
245 
incWeight(long weight, int length)246     private long incWeight(long weight, int length) {
247         for(;;) {
248             int b=getWeightByte(weight, length);
249             if(b<maxBytes[length]) {
250                 return setWeightByte(weight, length, b+1);
251             } else {
252                 // Roll over, set this byte to the minimum and increment the previous one.
253                 weight=setWeightByte(weight, length, minBytes[length]);
254                 --length;
255                 assert(length > 0);
256             }
257         }
258     }
259 
incWeightByOffset(long weight, int length, int offset)260     private long incWeightByOffset(long weight, int length, int offset) {
261         for(;;) {
262             offset += getWeightByte(weight, length);
263             if(offset <= maxBytes[length]) {
264                 return setWeightByte(weight, length, offset);
265             } else {
266                 // Split the offset between this byte and the previous one.
267                 offset -= minBytes[length];
268                 weight = setWeightByte(weight, length, minBytes[length] + offset % countBytes(length));
269                 offset /= countBytes(length);
270                 --length;
271                 assert(length > 0);
272             }
273         }
274     }
275 
lengthenRange(WeightRange range)276     private void lengthenRange(WeightRange range) {
277         int length=range.length+1;
278         range.start=setWeightTrail(range.start, length, minBytes[length]);
279         range.end=setWeightTrail(range.end, length, maxBytes[length]);
280         range.count*=countBytes(length);
281         range.length=length;
282     }
283 
284     /**
285      * Takes two CE weights and calculates the
286      * possible ranges of weights between the two limits, excluding them.
287      * For weights with up to 4 bytes there are up to 2*4-1=7 ranges.
288      */
getWeightRanges(long lowerLimit, long upperLimit)289     private boolean getWeightRanges(long lowerLimit, long upperLimit) {
290         assert(lowerLimit != 0);
291         assert(upperLimit != 0);
292 
293         /* get the lengths of the limits */
294         int lowerLength=lengthOfWeight(lowerLimit);
295         int upperLength=lengthOfWeight(upperLimit);
296 
297         // printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength);
298         // printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength);
299         assert(lowerLength>=middleLength);
300         // Permit upperLength<middleLength: The upper limit for secondaries is 0x10000.
301 
302         if(lowerLimit>=upperLimit) {
303             // printf("error: no space between lower & upper limits\n");
304             return false;
305         }
306 
307         /* check that neither is a prefix of the other */
308         if(lowerLength<upperLength) {
309             if(lowerLimit==truncateWeight(upperLimit, lowerLength)) {
310                 // printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit);
311                 return false;
312             }
313         }
314         /* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */
315 
316         WeightRange[] lower = new WeightRange[5]; /* [0] and [1] are not used - this simplifies indexing */
317         WeightRange middle = new WeightRange();
318         WeightRange[] upper = new WeightRange[5];
319 
320         /*
321          * With the limit lengths of 1..4, there are up to 7 ranges for allocation:
322          * range     minimum length
323          * lower[4]  4
324          * lower[3]  3
325          * lower[2]  2
326          * middle    1
327          * upper[2]  2
328          * upper[3]  3
329          * upper[4]  4
330          *
331          * We are now going to calculate up to 7 ranges.
332          * Some of them will typically overlap, so we will then have to merge and eliminate ranges.
333          */
334         long weight=lowerLimit;
335         for(int length=lowerLength; length>middleLength; --length) {
336             int trail=getWeightTrail(weight, length);
337             if(trail<maxBytes[length]) {
338                 lower[length] = new WeightRange();
339                 lower[length].start=incWeightTrail(weight, length);
340                 lower[length].end=setWeightTrail(weight, length, maxBytes[length]);
341                 lower[length].length=length;
342                 lower[length].count=maxBytes[length]-trail;
343             }
344             weight=truncateWeight(weight, length-1);
345         }
346         if(weight<0xff000000L) {
347             middle.start=incWeightTrail(weight, middleLength);
348         } else {
349             // Prevent overflow for primary lead byte FF
350             // which would yield a middle range starting at 0.
351             middle.start=0xffffffffL;  // no middle range
352         }
353 
354         weight=upperLimit;
355         for(int length=upperLength; length>middleLength; --length) {
356             int trail=getWeightTrail(weight, length);
357             if(trail>minBytes[length]) {
358                 upper[length] = new WeightRange();
359                 upper[length].start=setWeightTrail(weight, length, minBytes[length]);
360                 upper[length].end=decWeightTrail(weight, length);
361                 upper[length].length=length;
362                 upper[length].count=trail-minBytes[length];
363             }
364             weight=truncateWeight(weight, length-1);
365         }
366         middle.end=decWeightTrail(weight, middleLength);
367 
368         /* set the middle range */
369         middle.length=middleLength;
370         if(middle.end>=middle.start) {
371             middle.count=(int)((middle.end-middle.start)>>(8*(4-middleLength)))+1;
372         } else {
373             /* no middle range, eliminate overlaps */
374             for(int length=4; length>middleLength; --length) {
375                 if(lower[length] != null && upper[length] != null &&
376                         lower[length].count>0 && upper[length].count>0) {
377                     // Note: The lowerEnd and upperStart weights are versions of
378                     // lowerLimit and upperLimit (which are lowerLimit<upperLimit),
379                     // truncated (still less-or-equal)
380                     // and then with their last bytes changed to the
381                     // maxByte (for lowerEnd) or minByte (for upperStart).
382                     final long lowerEnd=lower[length].end;
383                     final long upperStart=upper[length].start;
384                     boolean merged=false;
385 
386                     if(lowerEnd>upperStart) {
387                         // These two lower and upper ranges collide.
388                         // Since lowerLimit<upperLimit and lowerEnd and upperStart
389                         // are versions with only their last bytes modified
390                         // (and following ones removed/reset to 0),
391                         // lowerEnd>upperStart is only possible
392                         // if the leading bytes are equal
393                         // and lastByte(lowerEnd)>lastByte(upperStart).
394                         assert(truncateWeight(lowerEnd, length-1)==
395                                 truncateWeight(upperStart, length-1));
396                         // Intersect these two ranges.
397                         lower[length].end=upper[length].end;
398                         lower[length].count=
399                                 getWeightTrail(lower[length].end, length)-
400                                 getWeightTrail(lower[length].start, length)+1;
401                         // count might be <=0 in which case there is no room,
402                         // and the range-collecting code below will ignore this range.
403                         merged=true;
404                     } else if(lowerEnd==upperStart) {
405                         // Not possible, unless minByte==maxByte which is not allowed.
406                         assert(minBytes[length]<maxBytes[length]);
407                     } else /* lowerEnd<upperStart */ {
408                         if(incWeight(lowerEnd, length)==upperStart) {
409                             // Merge adjacent ranges.
410                             lower[length].end=upper[length].end;
411                             lower[length].count+=upper[length].count;  // might be >countBytes
412                             merged=true;
413                         }
414                     }
415                     if(merged) {
416                         // Remove all shorter ranges.
417                         // There was no room available for them between the ranges we just merged.
418                         upper[length].count=0;
419                         while(--length>middleLength) {
420                             lower[length]=upper[length]=null;
421                         }
422                         break;
423                     }
424                 }
425             }
426         }
427 
428         /* print ranges
429         for(int length=4; length>=2; --length) {
430             if(lower[length].count>0) {
431                 printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count);
432             }
433         }
434         if(middle.count>0) {
435             printf("middle   .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count);
436         }
437         for(int length=2; length<=4; ++length) {
438             if(upper[length].count>0) {
439                 printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count);
440             }
441         } */
442 
443         /* copy the ranges, shortest first, into the result array */
444         rangeCount=0;
445         if(middle.count>0) {
446             ranges[0] = middle;
447             rangeCount=1;
448         }
449         for(int length=middleLength+1; length<=4; ++length) {
450             /* copy upper first so that later the middle range is more likely the first one to use */
451             if(upper[length] != null && upper[length].count>0) {
452                 ranges[rangeCount++]=upper[length];
453             }
454             if(lower[length] != null && lower[length].count>0) {
455                 ranges[rangeCount++]=lower[length];
456             }
457         }
458         return rangeCount>0;
459     }
460 
allocWeightsInShortRanges(int n, int minLength)461     private boolean allocWeightsInShortRanges(int n, int minLength) {
462         // See if the first few minLength and minLength+1 ranges have enough weights.
463         for(int i = 0; i < rangeCount && ranges[i].length <= (minLength + 1); ++i) {
464             if(n <= ranges[i].count) {
465                 // Use the first few minLength and minLength+1 ranges.
466                 if(ranges[i].length > minLength) {
467                     // Reduce the number of weights from the last minLength+1 range
468                     // which might sort before some minLength ranges,
469                     // so that we use all weights in the minLength ranges.
470                     ranges[i].count = n;
471                 }
472                 rangeCount = i + 1;
473                 // printf("take first %ld ranges\n", rangeCount);
474 
475                 if(rangeCount>1) {
476                     /* sort the ranges by weight values */
477                     Arrays.sort(ranges, 0, rangeCount);
478                 }
479                 return true;
480             }
481             n -= ranges[i].count;  // still >0
482         }
483         return false;
484     }
485 
allocWeightsInMinLengthRanges(int n, int minLength)486     private boolean allocWeightsInMinLengthRanges(int n, int minLength) {
487         // See if the minLength ranges have enough weights
488         // when we split one and lengthen the following ones.
489         int count = 0;
490         int minLengthRangeCount;
491         for(minLengthRangeCount = 0;
492                 minLengthRangeCount < rangeCount &&
493                     ranges[minLengthRangeCount].length == minLength;
494                 ++minLengthRangeCount) {
495             count += ranges[minLengthRangeCount].count;
496         }
497 
498         int nextCountBytes = countBytes(minLength + 1);
499         if(n > count * nextCountBytes) { return false; }
500 
501         // Use the minLength ranges. Merge them, and then split again as necessary.
502         long start = ranges[0].start;
503         long end = ranges[0].end;
504         for(int i = 1; i < minLengthRangeCount; ++i) {
505             if(ranges[i].start < start) { start = ranges[i].start; }
506             if(ranges[i].end > end) { end = ranges[i].end; }
507         }
508 
509         // Calculate how to split the range between minLength (count1) and minLength+1 (count2).
510         // Goal:
511         //   count1 + count2 * nextCountBytes = n
512         //   count1 + count2 = count
513         // These turn into
514         //   (count - count2) + count2 * nextCountBytes = n
515         // and then into the following count1 & count2 computations.
516         int count2 = (n - count) / (nextCountBytes - 1);  // number of weights to be lengthened
517         int count1 = count - count2;  // number of minLength weights
518         if(count2 == 0 || (count1 + count2 * nextCountBytes) < n) {
519             // round up
520             ++count2;
521             --count1;
522             assert((count1 + count2 * nextCountBytes) >= n);
523         }
524 
525         ranges[0].start = start;
526 
527         if(count1 == 0) {
528             // Make one long range.
529             ranges[0].end = end;
530             ranges[0].count = count;
531             lengthenRange(ranges[0]);
532             rangeCount = 1;
533         } else {
534             // Split the range, lengthen the second part.
535             // printf("split the range number %ld (out of %ld minLength ranges) by %ld:%ld\n",
536             //       splitRange, rangeCount, count1, count2);
537 
538             // Next start = start + count1. First end = 1 before that.
539             ranges[0].end = incWeightByOffset(start, minLength, count1 - 1);
540             ranges[0].count = count1;
541 
542             if(ranges[1] == null) {
543                 ranges[1] = new WeightRange();
544             }
545             ranges[1].start = incWeight(ranges[0].end, minLength);
546             ranges[1].end = end;
547             ranges[1].length = minLength;  // +1 when lengthened
548             ranges[1].count = count2;  // *countBytes when lengthened
549             lengthenRange(ranges[1]);
550             rangeCount = 2;
551         }
552         return true;
553     }
554 
555     private int middleLength;
556     private int[] minBytes = new int[5];  // for byte 1, 2, 3, 4
557     private int[] maxBytes = new int[5];
558     private WeightRange[] ranges = new WeightRange[7];
559     private int rangeIndex;
560     private int rangeCount;
561 }
562