• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2012-2015, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * collationbasedatabuilder.cpp
9 *
10 * created on: 2012aug11
11 * created by: Markus W. Scherer
12 */
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_COLLATION
17 
18 #include "unicode/localpointer.h"
19 #include "unicode/ucharstriebuilder.h"
20 #include "unicode/uniset.h"
21 #include "unicode/unistr.h"
22 #include "unicode/utf16.h"
23 #include "collation.h"
24 #include "collationbasedatabuilder.h"
25 #include "collationdata.h"
26 #include "collationdatabuilder.h"
27 #include "collationrootelements.h"
28 #include "normalizer2impl.h"
29 #include "uassert.h"
30 #include "utrie2.h"
31 #include "uvectr32.h"
32 #include "uvectr64.h"
33 #include "uvector.h"
34 
35 U_NAMESPACE_BEGIN
36 
37 namespace {
38 
39 /**
40  * Compare two signed int64_t values as if they were unsigned.
41  */
42 int32_t
compareInt64AsUnsigned(int64_t a,int64_t b)43 compareInt64AsUnsigned(int64_t a, int64_t b) {
44     if((uint64_t)a < (uint64_t)b) {
45         return -1;
46     } else if((uint64_t)a > (uint64_t)b) {
47         return 1;
48     } else {
49         return 0;
50     }
51 }
52 
53 // TODO: Try to merge this with the binarySearch in alphaindex.cpp.
54 /**
55  * Like Java Collections.binarySearch(List, String, Comparator).
56  *
57  * @return the index>=0 where the item was found,
58  *         or the index<0 for inserting the string at ~index in sorted order
59  */
60 int32_t
binarySearch(const UVector64 & list,int64_t ce)61 binarySearch(const UVector64 &list, int64_t ce) {
62     if (list.size() == 0) { return ~0; }
63     int32_t start = 0;
64     int32_t limit = list.size();
65     for (;;) {
66         int32_t i = (start + limit) / 2;
67         int32_t cmp = compareInt64AsUnsigned(ce, list.elementAti(i));
68         if (cmp == 0) {
69             return i;
70         } else if (cmp < 0) {
71             if (i == start) {
72                 return ~start;  // insert ce before i
73             }
74             limit = i;
75         } else {
76             if (i == start) {
77                 return ~(start + 1);  // insert ce after i
78             }
79             start = i;
80         }
81     }
82 }
83 
84 }  // namespace
85 
CollationBaseDataBuilder(UErrorCode & errorCode)86 CollationBaseDataBuilder::CollationBaseDataBuilder(UErrorCode &errorCode)
87         : CollationDataBuilder(errorCode),
88           numericPrimary(0x12000000),
89           firstHanPrimary(0), lastHanPrimary(0), hanStep(2),
90           rootElements(errorCode),
91           scriptStartsLength(1) {
92     uprv_memset(scriptsIndex, 0, sizeof(scriptsIndex));
93     uprv_memset(scriptStarts, 0, sizeof(scriptStarts));
94 }
95 
~CollationBaseDataBuilder()96 CollationBaseDataBuilder::~CollationBaseDataBuilder() {
97 }
98 
99 void
init(UErrorCode & errorCode)100 CollationBaseDataBuilder::init(UErrorCode &errorCode) {
101     if(U_FAILURE(errorCode)) { return; }
102     if(trie != NULL) {
103         errorCode = U_INVALID_STATE_ERROR;
104         return;
105     }
106 
107     // Not compressible:
108     // - digits
109     // - Latin
110     // - Hani
111     // - trail weights
112     // Some scripts are compressible, some are not.
113     uprv_memset(compressibleBytes, FALSE, 256);
114     compressibleBytes[Collation::UNASSIGNED_IMPLICIT_BYTE] = TRUE;
115 
116     // For a base, the default is to compute an unassigned-character implicit CE.
117     // This includes surrogate code points; see the last option in
118     // UCA section 7.1.1 Handling Ill-Formed Code Unit Sequences.
119     trie = utrie2_open(Collation::UNASSIGNED_CE32, Collation::FFFD_CE32, &errorCode);
120 
121     // Preallocate trie blocks for Latin in the hope that proximity helps with CPU caches.
122     for(UChar32 c = 0; c < 0x180; ++c) {
123         utrie2_set32(trie, c, Collation::UNASSIGNED_CE32, &errorCode);
124     }
125 
126     utrie2_set32(trie, 0xfffe, Collation::MERGE_SEPARATOR_CE32, &errorCode);
127     // No root element for the merge separator which has 02 weights.
128     // Some code assumes that the root first primary CE is the "space first primary"
129     // from FractionalUCA.txt.
130 
131     uint32_t hangulCE32 = Collation::makeCE32FromTagAndIndex(Collation::HANGUL_TAG, 0);
132     utrie2_setRange32(trie, Hangul::HANGUL_BASE, Hangul::HANGUL_END, hangulCE32, TRUE, &errorCode);
133 
134     // Add a mapping for the first-unassigned boundary,
135     // which is the AlphabeticIndex overflow boundary.
136     UnicodeString s((UChar)0xfdd1);  // Script boundary contractions start with U+FDD1.
137     s.append((UChar)0xfdd0);  // Zzzz script sample character U+FDD0.
138     int64_t ce = Collation::makeCE(Collation::FIRST_UNASSIGNED_PRIMARY);
139     add(UnicodeString(), s, &ce, 1, errorCode);
140 
141     // Add a tailoring boundary, but not a mapping, for [first trailing].
142     ce = Collation::makeCE(Collation::FIRST_TRAILING_PRIMARY);
143     rootElements.addElement(ce, errorCode);
144 
145     // U+FFFD maps to a CE with the third-highest primary weight,
146     // for predictable handling of ill-formed UTF-8.
147     uint32_t ce32 = Collation::FFFD_CE32;
148     utrie2_set32(trie, 0xfffd, ce32, &errorCode);
149     addRootElement(Collation::ceFromSimpleCE32(ce32), errorCode);
150 
151     // U+FFFF maps to a CE with the highest primary weight.
152     ce32 = Collation::MAX_REGULAR_CE32;
153     utrie2_set32(trie, 0xffff, ce32, &errorCode);
154     addRootElement(Collation::ceFromSimpleCE32(ce32), errorCode);
155 }
156 
157 void
initHanRanges(const UChar32 ranges[],int32_t length,UErrorCode & errorCode)158 CollationBaseDataBuilder::initHanRanges(const UChar32 ranges[], int32_t length,
159                                         UErrorCode &errorCode) {
160     if(U_FAILURE(errorCode) || length == 0) { return; }
161     if((length & 1) != 0) {  // incomplete start/end pairs
162         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
163         return;
164     }
165     if(isAssigned(0x4e00)) {  // already set
166         errorCode = U_INVALID_STATE_ERROR;
167         return;
168     }
169     int32_t numHanCodePoints = 0;
170     for(int32_t i = 0; i < length; i += 2) {
171         UChar32 start = ranges[i];
172         UChar32 end = ranges[i + 1];
173         numHanCodePoints += end - start + 1;
174     }
175     // Multiply the number of code points by (gap+1).
176     // Add hanStep+2 for tailoring after the last Han character.
177     int32_t gap = 1;
178     hanStep = gap + 1;
179     int32_t numHan = numHanCodePoints * hanStep + hanStep + 2;
180     // Numbers of Han primaries per lead byte determined by
181     // numbers of 2nd (not compressible) times 3rd primary byte values.
182     int32_t numHanPerLeadByte = 254 * 254;
183     int32_t numHanLeadBytes = (numHan + numHanPerLeadByte - 1) / numHanPerLeadByte;
184     uint32_t hanPrimary = (uint32_t)(Collation::UNASSIGNED_IMPLICIT_BYTE - numHanLeadBytes) << 24;
185     hanPrimary |= 0x20200;
186     firstHanPrimary = hanPrimary;
187     for(int32_t i = 0; i < length; i += 2) {
188         UChar32 start = ranges[i];
189         UChar32 end = ranges[i + 1];
190         hanPrimary = setPrimaryRangeAndReturnNext(start, end, hanPrimary, hanStep, errorCode);
191     }
192     // One past the actual last one, but that is harmless for tailoring.
193     // It saves us from subtracting "hanStep" and handling underflows.
194     lastHanPrimary = hanPrimary;
195 }
196 
197 UBool
isCompressibleLeadByte(uint32_t b) const198 CollationBaseDataBuilder::isCompressibleLeadByte(uint32_t b) const {
199     return compressibleBytes[b];
200 }
201 
202 void
setCompressibleLeadByte(uint32_t b)203 CollationBaseDataBuilder::setCompressibleLeadByte(uint32_t b) {
204     compressibleBytes[b] = TRUE;
205 }
206 
207 int32_t
diffTwoBytePrimaries(uint32_t p1,uint32_t p2,UBool isCompressible)208 CollationBaseDataBuilder::diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible) {
209     if((p1 & 0xff000000) == (p2 & 0xff000000)) {
210         // Same lead bytes.
211         return (int32_t)(p2 - p1) >> 16;
212     } else {
213         int32_t linear1;
214         int32_t linear2;
215         int32_t factor;
216         if(isCompressible) {
217             // Second byte for compressible lead byte: 251 bytes 04..FE
218             linear1 = (int32_t)((p1 >> 16) & 0xff) - 4;
219             linear2 = (int32_t)((p2 >> 16) & 0xff) - 4;
220             factor = 251;
221         } else {
222             // Second byte for incompressible lead byte: 254 bytes 02..FF
223             linear1 = (int32_t)((p1 >> 16) & 0xff) - 2;
224             linear2 = (int32_t)((p2 >> 16) & 0xff) - 2;
225             factor = 254;
226         }
227         linear1 += factor * (int32_t)((p1 >> 24) & 0xff);
228         linear2 += factor * (int32_t)((p2 >> 24) & 0xff);
229         return linear2 - linear1;
230     }
231 }
232 
233 int32_t
diffThreeBytePrimaries(uint32_t p1,uint32_t p2,UBool isCompressible)234 CollationBaseDataBuilder::diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible) {
235     if((p1 & 0xffff0000) == (p2 & 0xffff0000)) {
236         // Same first two bytes.
237         return (int32_t)(p2 - p1) >> 8;
238     } else {
239         // Third byte: 254 bytes 02..FF
240         int32_t linear1 = (int32_t)((p1 >> 8) & 0xff) - 2;
241         int32_t linear2 = (int32_t)((p2 >> 8) & 0xff) - 2;
242         int32_t factor;
243         if(isCompressible) {
244             // Second byte for compressible lead byte: 251 bytes 04..FE
245             linear1 += 254 * ((int32_t)((p1 >> 16) & 0xff) - 4);
246             linear2 += 254 * ((int32_t)((p2 >> 16) & 0xff) - 4);
247             factor = 251 * 254;
248         } else {
249             // Second byte for incompressible lead byte: 254 bytes 02..FF
250             linear1 += 254 * ((int32_t)((p1 >> 16) & 0xff) - 2);
251             linear2 += 254 * ((int32_t)((p2 >> 16) & 0xff) - 2);
252             factor = 254 * 254;
253         }
254         linear1 += factor * (int32_t)((p1 >> 24) & 0xff);
255         linear2 += factor * (int32_t)((p2 >> 24) & 0xff);
256         return linear2 - linear1;
257     }
258 }
259 
260 uint32_t
encodeCEs(const int64_t ces[],int32_t cesLength,UErrorCode & errorCode)261 CollationBaseDataBuilder::encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode) {
262     addRootElements(ces, cesLength, errorCode);
263     return CollationDataBuilder::encodeCEs(ces, cesLength, errorCode);
264 }
265 
266 void
addRootElements(const int64_t ces[],int32_t cesLength,UErrorCode & errorCode)267 CollationBaseDataBuilder::addRootElements(const int64_t ces[], int32_t cesLength,
268                                           UErrorCode &errorCode) {
269     if(U_FAILURE(errorCode)) { return; }
270     for(int32_t i = 0; i < cesLength; ++i) {
271         addRootElement(ces[i], errorCode);
272     }
273 }
274 
275 void
addRootElement(int64_t ce,UErrorCode & errorCode)276 CollationBaseDataBuilder::addRootElement(int64_t ce, UErrorCode &errorCode) {
277     if(U_FAILURE(errorCode) || ce == 0) { return; }
278     // Remove case bits.
279     ce &= INT64_C(0xffffffffffff3fff);
280     U_ASSERT((ce & 0xc0) == 0);  // quaternary==0
281     // Ignore the CE if it has a Han primary weight and common secondary/tertiary weights.
282     // We will add it later, as part of the Han ranges.
283     uint32_t p = (uint32_t)(ce >> 32);
284     uint32_t secTer = (uint32_t)ce;
285     if(firstHanPrimary <= p && p <= lastHanPrimary) {
286         if(secTer < Collation::COMMON_SEC_AND_TER_CE) {
287             // buildRootElementsTable() does not currently handle this case.
288             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
289             return;
290         }
291         if(secTer == Collation::COMMON_SEC_AND_TER_CE) {
292             return;
293         }
294     }
295     if(secTer != Collation::COMMON_SEC_AND_TER_CE) {  // minor optimization
296         // Check that secondary and tertiary weights are > 01.
297         uint32_t s = secTer >> 16;
298         uint32_t t = secTer & Collation::ONLY_TERTIARY_MASK;
299         if((s != 0 && s <= Collation::BEFORE_WEIGHT16) ||
300                 (t != 0 && t <= Collation::BEFORE_WEIGHT16)) {
301             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
302             return;
303         }
304     }
305     // Check that primaries have at most 3 bytes.
306     if((p & 0xff) != 0) {
307         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
308         return;
309     }
310     int32_t i = binarySearch(rootElements, ce);
311     if(i < 0) {
312         rootElements.insertElementAt(ce, ~i, errorCode);
313     }
314 }
315 
316 void
addScriptStart(int32_t script,uint32_t p)317 CollationBaseDataBuilder::addScriptStart(int32_t script, uint32_t p) {
318     // The primary weight must be the lowest possible for a two-byte prefix.
319     // It could be 2, 3, or 4 bytes long. We round down to the two-byte boundary.
320     U_ASSERT((p & 0xff) == 0 || (p & 0xff) == 2);
321     p >>= 8;
322     U_ASSERT((p & 0xff) == 0 || (p & 0xff) == 2);
323     p >>= 8;
324     uint32_t lowestP2 = compressibleBytes[p >> 8] ? 4 : 2;
325     if((p & 0xff) == lowestP2) {
326         // The script really starts on a lead byte boundary. Round down to that.
327         p &= 0xff00;
328     }
329     // Script starts should be added in ascending order, otherwise we would need to sort them.
330     if(script < UCOL_REORDER_CODE_FIRST) {
331         U_ASSERT(0 <= script && script < USCRIPT_CODE_LIMIT);
332     } else {
333         U_ASSERT(script <= (UCOL_REORDER_CODE_FIRST + 15));
334         script = USCRIPT_CODE_LIMIT + script - UCOL_REORDER_CODE_FIRST;
335     }
336     if(scriptStartsLength != 0 && scriptStarts[scriptStartsLength - 1] == p) {
337         // Two scripts share a range (e.g., Hira & Kana).
338         scriptsIndex[script] = (uint16_t)(scriptStartsLength - 1);
339     } else {
340         U_ASSERT(scriptStartsLength == 0 || scriptStarts[scriptStartsLength - 1] <= p);
341         U_ASSERT(scriptStartsLength < UPRV_LENGTHOF(scriptStarts));
342         scriptsIndex[script] = (uint16_t)scriptStartsLength;
343         scriptStarts[scriptStartsLength++] = (uint16_t)p;
344     }
345     if(script == USCRIPT_UNKNOWN) {
346         // The last script start is for unassigned code points
347         // (with high implict primary weights).
348         // Add one more entry with the limit of this range,
349         // which is the start of the trailing-weights range.
350         U_ASSERT(scriptStartsLength < UPRV_LENGTHOF(scriptStarts));
351         scriptStarts[scriptStartsLength++] =
352                 (uint16_t)((Collation::FIRST_TRAILING_PRIMARY >> 16) & 0xff00);
353     }
354 }
355 
356 void
build(CollationData & data,UErrorCode & errorCode)357 CollationBaseDataBuilder::build(CollationData &data, UErrorCode &errorCode) {
358     buildMappings(data, errorCode);
359     data.numericPrimary = numericPrimary;
360     data.compressibleBytes = compressibleBytes;
361 
362     int32_t numScripts = USCRIPT_CODE_LIMIT;
363     while(numScripts > 0 && scriptsIndex[numScripts - 1] == 0) { --numScripts; }
364     // Move the 16 special groups (not all used)
365     // down for contiguous storage of the script and special-group indexes.
366     for(int32_t i = 0; i < 16; ++i) {
367         scriptsIndex[numScripts + i] = scriptsIndex[USCRIPT_CODE_LIMIT + i];
368     }
369     data.numScripts = numScripts;
370     data.scriptsIndex = scriptsIndex;
371     data.scriptStarts = scriptStarts;
372     data.scriptStartsLength = scriptStartsLength;
373     buildFastLatinTable(data, errorCode);
374 }
375 
376 void
buildRootElementsTable(UVector32 & table,UErrorCode & errorCode)377 CollationBaseDataBuilder::buildRootElementsTable(UVector32 &table, UErrorCode &errorCode) {
378     // Limit sentinel for root elements.
379     // This allows us to reduce range checks at runtime.
380     rootElements.addElement(Collation::makeCE(CollationRootElements::PRIMARY_SENTINEL), errorCode);
381     if(U_FAILURE(errorCode)) { return; }
382     uint32_t nextHanPrimary = firstHanPrimary;  // Set to 0xffffffff after the last Han range.
383     uint32_t prevPrimary = 0;  // Start with primary ignorable CEs.
384     UBool needCommonSecTerUnit = FALSE;
385     UBool hasDeltaUnit = FALSE;
386     for(int32_t i = 0; i < rootElements.size(); ++i) {
387         int64_t ce = rootElements.elementAti(i);
388         uint32_t p = (uint32_t)(ce >> 32);
389         uint32_t secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK;
390         if((p != prevPrimary || secTer > Collation::COMMON_SEC_AND_TER_CE) && needCommonSecTerUnit) {
391             // The last primary had low sec/ter weights but no common sec/ter combination.
392             // The next unit is either a new primary or an above-common sec/ter unit.
393             // Insert a common sec/ter unit so that the builder will reliably
394             // tailor to either before or after a common weight but not across it.
395             table.addElement((int32_t)Collation::COMMON_SEC_AND_TER_CE |
396                             CollationRootElements::SEC_TER_DELTA_FLAG, errorCode);
397         }
398         if(p != prevPrimary) {
399             U_ASSERT((p & 0xff) == 0);
400             int32_t end;
401             if(p >= nextHanPrimary) {
402                 // Add a Han primary weight or range.
403                 // We omitted them initially, and omitted all CEs with Han primaries
404                 // and common secondary/tertiary weights.
405                 U_ASSERT(p > lastHanPrimary || secTer > Collation::COMMON_SEC_AND_TER_CE);
406                 if(p == nextHanPrimary) {
407                     // One single Han primary with non-common secondary/tertiary weights.
408                     table.addElement((int32_t)p, errorCode);
409                     if(p < lastHanPrimary) {
410                         // Prepare for the next Han range.
411                         nextHanPrimary = Collation::incThreeBytePrimaryByOffset(p, FALSE, hanStep);
412                     } else {
413                         // p is the last Han primary.
414                         nextHanPrimary = 0xffffffff;
415                     }
416                 } else {
417                     // p > nextHanPrimary: Add a Han primary range, starting with nextHanPrimary.
418                     table.addElement((int32_t)nextHanPrimary, errorCode);
419                     if(nextHanPrimary == lastHanPrimary) {
420                         // nextHanPrimary == lastHanPrimary < p
421                         // We just wrote the single last Han primary.
422                         nextHanPrimary = 0xffffffff;
423                         table.addElement((int32_t)p, errorCode);
424                     } else if(p < lastHanPrimary) {
425                         // nextHanPrimary < p < lastHanPrimary
426                         // End the Han range on p, prepare for the next range.
427                         table.addElement((int32_t)p | hanStep, errorCode);
428                         nextHanPrimary = Collation::incThreeBytePrimaryByOffset(p, FALSE, hanStep);
429                     } else if(p == lastHanPrimary) {
430                         // nextHanPrimary < p == lastHanPrimary
431                         // End the last Han range on p.
432                         table.addElement((int32_t)p | hanStep, errorCode);
433                         nextHanPrimary = 0xffffffff;
434                     } else {
435                         // nextHanPrimary < lastHanPrimary < p
436                         // End the last Han range, then write p.
437                         table.addElement((int32_t)lastHanPrimary | hanStep, errorCode);
438                         nextHanPrimary = 0xffffffff;
439                         table.addElement((int32_t)p, errorCode);
440                     }
441                 }
442             } else if(prevPrimary != 0 &&
443                     // If there has not been an intervening delta unit,
444                     // then we will try to combine the previous primary and
445                     // the next several primaries into a range.
446                     !hasDeltaUnit &&
447                     // Might get a range with more than two primaries if the current CE
448                     // has common sec/ter weights.
449                     secTer == Collation::COMMON_SEC_AND_TER_CE &&
450                     (end = writeRootElementsRange(prevPrimary, p, i + 1, table, errorCode)) != 0) {
451                 // Multiple CEs with only common secondary/tertiary weights were
452                 // combined into a primary range.
453                 // The range end was written, ending with the primary of rootElements[end].
454                 ce = rootElements.elementAti(end);
455                 p = (uint32_t)(ce >> 32);
456                 secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK;
457                 i = end;
458             } else {
459                 // Write the primary weight of a normal CE.
460                 table.addElement((int32_t)p, errorCode);
461             }
462             prevPrimary = p;
463             needCommonSecTerUnit = FALSE;
464             hasDeltaUnit = FALSE;
465         }
466         if(secTer == Collation::COMMON_SEC_AND_TER_CE && !needCommonSecTerUnit) {
467             // The common secondar/tertiary weights are implied in the primary unit.
468         } else {
469             if(secTer < Collation::COMMON_SEC_AND_TER_CE) {
470                 // Remember to not suppress a common sec/ter unit if p!=0.
471                 needCommonSecTerUnit = p != 0;
472             } else if(secTer == Collation::COMMON_SEC_AND_TER_CE) {
473                 // Real common sec/ter unit, no need to insert an artificial one.
474                 needCommonSecTerUnit = FALSE;
475             }
476             // For each new set of secondary/tertiary weights we write a delta unit.
477             table.addElement((int32_t)secTer | CollationRootElements::SEC_TER_DELTA_FLAG, errorCode);
478             hasDeltaUnit = TRUE;
479         }
480     }
481 }
482 
483 int32_t
writeRootElementsRange(uint32_t prevPrimary,uint32_t p,int32_t i,UVector32 & table,UErrorCode & errorCode)484 CollationBaseDataBuilder::writeRootElementsRange(
485         uint32_t prevPrimary, uint32_t p, int32_t i,
486         UVector32 &table, UErrorCode &errorCode) {
487     if(U_FAILURE(errorCode) || i >= rootElements.size()) { return 0; }
488     U_ASSERT(prevPrimary < p);
489     // No ranges of single-byte primaries.
490     if((p & prevPrimary & 0xff0000) == 0) { return 0; }
491     // Lead bytes of compressible primaries must match.
492     UBool isCompressible = isCompressiblePrimary(p);
493     if((isCompressible || isCompressiblePrimary(prevPrimary)) &&
494             (p & 0xff000000) != (prevPrimary & 0xff000000)) {
495         return 0;
496     }
497     // Number of bytes in the primaries.
498     UBool twoBytes;
499     // Number of primaries from prevPrimary to p.
500     int32_t step;
501     if((p & 0xff00) == 0) {
502         // 2-byte primary
503         if((prevPrimary & 0xff00) != 0) { return 0; }  // length mismatch
504         twoBytes = TRUE;
505         step = diffTwoBytePrimaries(prevPrimary, p, isCompressible);
506     } else {
507         // 3-byte primary
508         if((prevPrimary & 0xff00) == 0) { return 0; }  // length mismatch
509         twoBytes = FALSE;
510         step = diffThreeBytePrimaries(prevPrimary, p, isCompressible);
511     }
512     if(step > (int32_t)CollationRootElements::PRIMARY_STEP_MASK) { return 0; }
513     // See if there are more than two CEs with primaries increasing by "step"
514     // and with only common secondary/tertiary weights on all but the last one.
515     int32_t end = 0;  // Initially 0: No range for just two primaries.
516     for(;;) {
517         prevPrimary = p;
518         // Calculate which primary we expect next.
519         uint32_t nextPrimary;  // = p + step
520         if(twoBytes) {
521             nextPrimary = Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
522         } else {
523             nextPrimary = Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
524         }
525         // Fetch the actual next CE.
526         int64_t ce = rootElements.elementAti(i);
527         p = (uint32_t)(ce >> 32);
528         uint32_t secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK;
529         // Does this primary increase by "step" from the last one?
530         if(p != nextPrimary ||
531                 // Do not cross into a new lead byte if either is compressible.
532                 ((p & 0xff000000) != (prevPrimary & 0xff000000) &&
533                     (isCompressible || isCompressiblePrimary(p)))) {
534             // The range ends with the previous CE.
535             p = prevPrimary;
536             break;
537         }
538         // Extend the range to include this primary.
539         end = i++;
540         // This primary is the last in the range if it has non-common weights
541         // or if we are at the end of the list.
542         if(secTer != Collation::COMMON_SEC_AND_TER_CE || i >= rootElements.size()) { break; }
543     }
544     if(end != 0) {
545         table.addElement((int32_t)p | step, errorCode);
546     }
547     return end;
548 }
549 
550 U_NAMESPACE_END
551 
552 #endif  // !UCONFIG_NO_COLLATION
553