• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2015, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationkeys.cpp
7 *
8 * created on: 2012sep02
9 * created by: Markus W. Scherer
10 */
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_COLLATION
15 
16 #include "unicode/bytestream.h"
17 #include "collation.h"
18 #include "collationiterator.h"
19 #include "collationkeys.h"
20 #include "collationsettings.h"
21 #include "uassert.h"
22 
23 U_NAMESPACE_BEGIN
24 
~SortKeyByteSink()25 SortKeyByteSink::~SortKeyByteSink() {}
26 
27 void
Append(const char * bytes,int32_t n)28 SortKeyByteSink::Append(const char *bytes, int32_t n) {
29     if (n <= 0 || bytes == NULL) {
30         return;
31     }
32     if (ignore_ > 0) {
33         int32_t ignoreRest = ignore_ - n;
34         if (ignoreRest >= 0) {
35             ignore_ = ignoreRest;
36             return;
37         } else {
38             bytes += ignore_;
39             n = -ignoreRest;
40             ignore_ = 0;
41         }
42     }
43     int32_t length = appended_;
44     appended_ += n;
45     if ((buffer_ + length) == bytes) {
46         return;  // the caller used GetAppendBuffer() and wrote the bytes already
47     }
48     int32_t available = capacity_ - length;
49     if (n <= available) {
50         uprv_memcpy(buffer_ + length, bytes, n);
51     } else {
52         AppendBeyondCapacity(bytes, n, length);
53     }
54 }
55 
56 char *
GetAppendBuffer(int32_t min_capacity,int32_t desired_capacity_hint,char * scratch,int32_t scratch_capacity,int32_t * result_capacity)57 SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
58                                  int32_t desired_capacity_hint,
59                                  char *scratch,
60                                  int32_t scratch_capacity,
61                                  int32_t *result_capacity) {
62     if (min_capacity < 1 || scratch_capacity < min_capacity) {
63         *result_capacity = 0;
64         return NULL;
65     }
66     if (ignore_ > 0) {
67         // Do not write ignored bytes right at the end of the buffer.
68         *result_capacity = scratch_capacity;
69         return scratch;
70     }
71     int32_t available = capacity_ - appended_;
72     if (available >= min_capacity) {
73         *result_capacity = available;
74         return buffer_ + appended_;
75     } else if (Resize(desired_capacity_hint, appended_)) {
76         *result_capacity = capacity_ - appended_;
77         return buffer_ + appended_;
78     } else {
79         *result_capacity = scratch_capacity;
80         return scratch;
81     }
82 }
83 
84 namespace {
85 
86 /**
87  * uint8_t byte buffer, similar to CharString but simpler.
88  */
89 class SortKeyLevel : public UMemory {
90 public:
SortKeyLevel()91     SortKeyLevel() : len(0), ok(TRUE) {}
~SortKeyLevel()92     ~SortKeyLevel() {}
93 
94     /** @return FALSE if memory allocation failed */
isOk() const95     UBool isOk() const { return ok; }
isEmpty() const96     UBool isEmpty() const { return len == 0; }
length() const97     int32_t length() const { return len; }
data() const98     const uint8_t *data() const { return buffer.getAlias(); }
operator [](int32_t index) const99     uint8_t operator[](int32_t index) const { return buffer[index]; }
100 
data()101     uint8_t *data() { return buffer.getAlias(); }
102 
103     void appendByte(uint32_t b);
104     void appendWeight16(uint32_t w);
105     void appendWeight32(uint32_t w);
106     void appendReverseWeight16(uint32_t w);
107 
108     /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */
appendTo(ByteSink & sink) const109     void appendTo(ByteSink &sink) const {
110         U_ASSERT(len > 0 && buffer[len - 1] == 1);
111         sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);
112     }
113 
114 private:
115     MaybeStackArray<uint8_t, 40> buffer;
116     int32_t len;
117     UBool ok;
118 
119     UBool ensureCapacity(int32_t appendCapacity);
120 
121     SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
122     SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
123 };
124 
appendByte(uint32_t b)125 void SortKeyLevel::appendByte(uint32_t b) {
126     if(len < buffer.getCapacity() || ensureCapacity(1)) {
127         buffer[len++] = (uint8_t)b;
128     }
129 }
130 
131 void
appendWeight16(uint32_t w)132 SortKeyLevel::appendWeight16(uint32_t w) {
133     U_ASSERT((w & 0xffff) != 0);
134     uint8_t b0 = (uint8_t)(w >> 8);
135     uint8_t b1 = (uint8_t)w;
136     int32_t appendLength = (b1 == 0) ? 1 : 2;
137     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
138         buffer[len++] = b0;
139         if(b1 != 0) {
140             buffer[len++] = b1;
141         }
142     }
143 }
144 
145 void
appendWeight32(uint32_t w)146 SortKeyLevel::appendWeight32(uint32_t w) {
147     U_ASSERT(w != 0);
148     uint8_t bytes[4] = { (uint8_t)(w >> 24), (uint8_t)(w >> 16), (uint8_t)(w >> 8), (uint8_t)w };
149     int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
150     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
151         buffer[len++] = bytes[0];
152         if(bytes[1] != 0) {
153             buffer[len++] = bytes[1];
154             if(bytes[2] != 0) {
155                 buffer[len++] = bytes[2];
156                 if(bytes[3] != 0) {
157                     buffer[len++] = bytes[3];
158                 }
159             }
160         }
161     }
162 }
163 
164 void
appendReverseWeight16(uint32_t w)165 SortKeyLevel::appendReverseWeight16(uint32_t w) {
166     U_ASSERT((w & 0xffff) != 0);
167     uint8_t b0 = (uint8_t)(w >> 8);
168     uint8_t b1 = (uint8_t)w;
169     int32_t appendLength = (b1 == 0) ? 1 : 2;
170     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
171         if(b1 == 0) {
172             buffer[len++] = b0;
173         } else {
174             buffer[len] = b1;
175             buffer[len + 1] = b0;
176             len += 2;
177         }
178     }
179 }
180 
ensureCapacity(int32_t appendCapacity)181 UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
182     if(!ok) {
183         return FALSE;
184     }
185     int32_t newCapacity = 2 * buffer.getCapacity();
186     int32_t altCapacity = len + 2 * appendCapacity;
187     if (newCapacity < altCapacity) {
188         newCapacity = altCapacity;
189     }
190     if (newCapacity < 200) {
191         newCapacity = 200;
192     }
193     if(buffer.resize(newCapacity, len)==NULL) {
194         return ok = FALSE;
195     }
196     return TRUE;
197 }
198 
199 }  // namespace
200 
~LevelCallback()201 CollationKeys::LevelCallback::~LevelCallback() {}
202 
203 UBool
needToWrite(Collation::Level)204 CollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return TRUE; }
205 
206 /**
207  * Map from collation strength (UColAttributeValue)
208  * to a mask of Collation::Level bits up to that strength,
209  * excluding the CASE_LEVEL which is independent of the strength,
210  * and excluding IDENTICAL_LEVEL which this function does not write.
211  */
212 static const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = {
213     2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
214     6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
215     0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
216     0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
217     0, 0, 0, 0,
218     0, 0, 0, 0,
219     0, 0, 0,
220     0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
221 };
222 
223 void
writeSortKeyUpToQuaternary(CollationIterator & iter,const UBool * compressibleBytes,const CollationSettings & settings,SortKeyByteSink & sink,Collation::Level minLevel,LevelCallback & callback,UBool preflight,UErrorCode & errorCode)224 CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
225                                           const UBool *compressibleBytes,
226                                           const CollationSettings &settings,
227                                           SortKeyByteSink &sink,
228                                           Collation::Level minLevel, LevelCallback &callback,
229                                           UBool preflight, UErrorCode &errorCode) {
230     if(U_FAILURE(errorCode)) { return; }
231 
232     int32_t options = settings.options;
233     // Set of levels to process and write.
234     uint32_t levels = levelMasks[CollationSettings::getStrength(options)];
235     if((options & CollationSettings::CASE_LEVEL) != 0) {
236         levels |= Collation::CASE_LEVEL_FLAG;
237     }
238     // Minus the levels below minLevel.
239     levels &= ~(((uint32_t)1 << minLevel) - 1);
240     if(levels == 0) { return; }
241 
242     uint32_t variableTop;
243     if((options & CollationSettings::ALTERNATE_MASK) == 0) {
244         variableTop = 0;
245     } else {
246         // +1 so that we can use "<" and primary ignorables test out early.
247         variableTop = settings.variableTop + 1;
248     }
249 
250     uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
251 
252     SortKeyLevel cases;
253     SortKeyLevel secondaries;
254     SortKeyLevel tertiaries;
255     SortKeyLevel quaternaries;
256 
257     uint32_t prevReorderedPrimary = 0;  // 0==no compression
258     int32_t commonCases = 0;
259     int32_t commonSecondaries = 0;
260     int32_t commonTertiaries = 0;
261     int32_t commonQuaternaries = 0;
262 
263     uint32_t prevSecondary = 0;
264     int32_t secSegmentStart = 0;
265 
266     for(;;) {
267         // No need to keep all CEs in the buffer when we write a sort key.
268         iter.clearCEsIfNoneRemaining();
269         int64_t ce = iter.nextCE(errorCode);
270         uint32_t p = (uint32_t)(ce >> 32);
271         if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) {
272             // Variable CE, shift it to quaternary level.
273             // Ignore all following primary ignorables, and shift further variable CEs.
274             if(commonQuaternaries != 0) {
275                 --commonQuaternaries;
276                 while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
277                     quaternaries.appendByte(QUAT_COMMON_MIDDLE);
278                     commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
279                 }
280                 // Shifted primary weights are lower than the common weight.
281                 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
282                 commonQuaternaries = 0;
283             }
284             do {
285                 if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
286                     if(settings.hasReordering()) {
287                         p = settings.reorder(p);
288                     }
289                     if((p >> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
290                         // Prevent shifted primary lead bytes from
291                         // overlapping with the common compression range.
292                         quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
293                     }
294                     quaternaries.appendWeight32(p);
295                 }
296                 do {
297                     ce = iter.nextCE(errorCode);
298                     p = (uint32_t)(ce >> 32);
299                 } while(p == 0);
300             } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);
301         }
302         // ce could be primary ignorable, or NO_CE, or the merge separator,
303         // or a regular primary CE, but it is not variable.
304         // If ce==NO_CE, then write nothing for the primary level but
305         // terminate compression on all levels and then exit the loop.
306         if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
307             // Test the un-reordered primary for compressibility.
308             UBool isCompressible = compressibleBytes[p >> 24];
309             if(settings.hasReordering()) {
310                 p = settings.reorder(p);
311             }
312             uint32_t p1 = p >> 24;
313             if(!isCompressible || p1 != (prevReorderedPrimary >> 24)) {
314                 if(prevReorderedPrimary != 0) {
315                     if(p < prevReorderedPrimary) {
316                         // No primary compression terminator
317                         // at the end of the level or merged segment.
318                         if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
319                             sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);
320                         }
321                     } else {
322                         sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);
323                     }
324                 }
325                 sink.Append(p1);
326                 if(isCompressible) {
327                     prevReorderedPrimary = p;
328                 } else {
329                     prevReorderedPrimary = 0;
330                 }
331             }
332             char p2 = (char)(p >> 16);
333             if(p2 != 0) {
334                 char buffer[3] = { p2, (char)(p >> 8), (char)p };
335                 sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);
336             }
337             // Optimization for internalNextSortKeyPart():
338             // When the primary level overflows we can stop because we need not
339             // calculate (preflight) the whole sort key length.
340             if(!preflight && sink.Overflowed()) {
341                 if(U_SUCCESS(errorCode) && !sink.IsOk()) {
342                     errorCode = U_MEMORY_ALLOCATION_ERROR;
343                 }
344                 return;
345             }
346         }
347 
348         uint32_t lower32 = (uint32_t)ce;
349         if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary
350 
351         if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
352             uint32_t s = lower32 >> 16;
353             if(s == 0) {
354                 // secondary ignorable
355             } else if(s == Collation::COMMON_WEIGHT16 &&
356                     ((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||
357                         p != Collation::MERGE_SEPARATOR_PRIMARY)) {
358                 // s is a common secondary weight, and
359                 // backwards-secondary is off or the ce is not the merge separator.
360                 ++commonSecondaries;
361             } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
362                 if(commonSecondaries != 0) {
363                     --commonSecondaries;
364                     while(commonSecondaries >= SEC_COMMON_MAX_COUNT) {
365                         secondaries.appendByte(SEC_COMMON_MIDDLE);
366                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
367                     }
368                     uint32_t b;
369                     if(s < Collation::COMMON_WEIGHT16) {
370                         b = SEC_COMMON_LOW + commonSecondaries;
371                     } else {
372                         b = SEC_COMMON_HIGH - commonSecondaries;
373                     }
374                     secondaries.appendByte(b);
375                     commonSecondaries = 0;
376                 }
377                 secondaries.appendWeight16(s);
378             } else {
379                 if(commonSecondaries != 0) {
380                     --commonSecondaries;
381                     // Append reverse weights. The level will be re-reversed later.
382                     int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
383                     uint32_t b;
384                     if(prevSecondary < Collation::COMMON_WEIGHT16) {
385                         b = SEC_COMMON_LOW + remainder;
386                     } else {
387                         b = SEC_COMMON_HIGH - remainder;
388                     }
389                     secondaries.appendByte(b);
390                     commonSecondaries -= remainder;
391                     // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
392                     while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT
393                         secondaries.appendByte(SEC_COMMON_MIDDLE);
394                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
395                     }
396                     // commonSecondaries == 0
397                 }
398                 if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) {
399                     // The backwards secondary level compares secondary weights backwards
400                     // within segments separated by the merge separator (U+FFFE).
401                     uint8_t *secs = secondaries.data();
402                     int32_t last = secondaries.length() - 1;
403                     if(secSegmentStart < last) {
404                         uint8_t *p = secs + secSegmentStart;
405                         uint8_t *q = secs + last;
406                         do {
407                             uint8_t b = *p;
408                             *p++ = *q;
409                             *q-- = b;
410                         } while(p < q);
411                     }
412                     secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?
413                         Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);
414                     prevSecondary = 0;
415                     secSegmentStart = secondaries.length();
416                 } else {
417                     secondaries.appendReverseWeight16(s);
418                     prevSecondary = s;
419                 }
420             }
421         }
422 
423         if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
424             if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?
425                     p == 0 : lower32 <= 0xffff) {
426                 // Primary+caseLevel: Ignore case level weights of primary ignorables.
427                 // Otherwise: Ignore case level weights of secondary ignorables.
428                 // For details see the comments in the CollationCompare class.
429             } else {
430                 uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte
431                 U_ASSERT((c & 0xc0) != 0xc0);
432                 if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) {
433                     ++commonCases;
434                 } else {
435                     if((options & CollationSettings::UPPER_FIRST) == 0) {
436                         // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
437                         // If there are only common (=lowest) weights in the whole level,
438                         // then we need not write anything.
439                         // Level length differences are handled already on the next-higher level.
440                         if(commonCases != 0 &&
441                                 (c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
442                             --commonCases;
443                             while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
444                                 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
445                                 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
446                             }
447                             uint32_t b;
448                             if(c <= Collation::LEVEL_SEPARATOR_BYTE) {
449                                 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
450                             } else {
451                                 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
452                             }
453                             cases.appendByte(b << 4);
454                             commonCases = 0;
455                         }
456                         if(c > Collation::LEVEL_SEPARATOR_BYTE) {
457                             c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15
458                         }
459                     } else {
460                         // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.
461                         // The compressed common case weights only go up from the "low" value
462                         // because with upperFirst the common weight is the highest one.
463                         if(commonCases != 0) {
464                             --commonCases;
465                             while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
466                                 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
467                                 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
468                             }
469                             cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
470                             commonCases = 0;
471                         }
472                         if(c > Collation::LEVEL_SEPARATOR_BYTE) {
473                             c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1
474                         }
475                     }
476                     // c is a separator byte 01,
477                     // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
478                     cases.appendByte(c);
479                 }
480             }
481         }
482 
483         if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
484             uint32_t t = lower32 & tertiaryMask;
485             U_ASSERT((lower32 & 0xc000) != 0xc000);
486             if(t == Collation::COMMON_WEIGHT16) {
487                 ++commonTertiaries;
488             } else if((tertiaryMask & 0x8000) == 0) {
489                 // Tertiary weights without case bits.
490                 // Move lead bytes 06..3F to C6..FF for a large common-weight range.
491                 if(commonTertiaries != 0) {
492                     --commonTertiaries;
493                     while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
494                         tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
495                         commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
496                     }
497                     uint32_t b;
498                     if(t < Collation::COMMON_WEIGHT16) {
499                         b = TER_ONLY_COMMON_LOW + commonTertiaries;
500                     } else {
501                         b = TER_ONLY_COMMON_HIGH - commonTertiaries;
502                     }
503                     tertiaries.appendByte(b);
504                     commonTertiaries = 0;
505                 }
506                 if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; }
507                 tertiaries.appendWeight16(t);
508             } else if((options & CollationSettings::UPPER_FIRST) == 0) {
509                 // Tertiary weights with caseFirst=lowerFirst.
510                 // Move lead bytes 06..BF to 46..FF for the common-weight range.
511                 if(commonTertiaries != 0) {
512                     --commonTertiaries;
513                     while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
514                         tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
515                         commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
516                     }
517                     uint32_t b;
518                     if(t < Collation::COMMON_WEIGHT16) {
519                         b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
520                     } else {
521                         b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
522                     }
523                     tertiaries.appendByte(b);
524                     commonTertiaries = 0;
525                 }
526                 if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; }
527                 tertiaries.appendWeight16(t);
528             } else {
529                 // Tertiary weights with caseFirst=upperFirst.
530                 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
531                 // to keep tertiary CEs well-formed.
532                 // Their case+tertiary weights must be greater than those of
533                 // primary and secondary CEs.
534                 //
535                 // Separator         01 -> 01      (unchanged)
536                 // Lowercase     02..04 -> 82..84  (includes uncased)
537                 // Common weight     05 -> 85..C5  (common-weight compression range)
538                 // Lowercase     06..3F -> C6..FF
539                 // Mixed case    42..7F -> 42..7F
540                 // Uppercase     82..BF -> 02..3F
541                 // Tertiary CE   86..BF -> C6..FF
542                 if(t <= Collation::NO_CE_WEIGHT16) {
543                     // Keep separators unchanged.
544                 } else if(lower32 > 0xffff) {
545                     // Invert case bits of primary & secondary CEs.
546                     t ^= 0xc000;
547                     if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
548                         t -= 0x4000;
549                     }
550                 } else {
551                     // Keep uppercase bits of tertiary CEs.
552                     U_ASSERT(0x8600 <= t && t <= 0xbfff);
553                     t += 0x4000;
554                 }
555                 if(commonTertiaries != 0) {
556                     --commonTertiaries;
557                     while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
558                         tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
559                         commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
560                     }
561                     uint32_t b;
562                     if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
563                         b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
564                     } else {
565                         b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
566                     }
567                     tertiaries.appendByte(b);
568                     commonTertiaries = 0;
569                 }
570                 tertiaries.appendWeight16(t);
571             }
572         }
573 
574         if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
575             uint32_t q = lower32 & 0xffff;
576             if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) {
577                 ++commonQuaternaries;
578             } else if(q == Collation::NO_CE_WEIGHT16 &&
579                     (options & CollationSettings::ALTERNATE_MASK) == 0 &&
580                     quaternaries.isEmpty()) {
581                 // If alternate=non-ignorable and there are only common quaternary weights,
582                 // then we need not write anything.
583                 // The only weights greater than the merge separator and less than the common weight
584                 // are shifted primary weights, which are not generated for alternate=non-ignorable.
585                 // There are also exactly as many quaternary weights as tertiary weights,
586                 // so level length differences are handled already on tertiary level.
587                 // Any above-common quaternary weight will compare greater regardless.
588                 quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);
589             } else {
590                 if(q == Collation::NO_CE_WEIGHT16) {
591                     q = Collation::LEVEL_SEPARATOR_BYTE;
592                 } else {
593                     q = 0xfc + ((q >> 6) & 3);
594                 }
595                 if(commonQuaternaries != 0) {
596                     --commonQuaternaries;
597                     while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
598                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);
599                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
600                     }
601                     uint32_t b;
602                     if(q < QUAT_COMMON_LOW) {
603                         b = QUAT_COMMON_LOW + commonQuaternaries;
604                     } else {
605                         b = QUAT_COMMON_HIGH - commonQuaternaries;
606                     }
607                     quaternaries.appendByte(b);
608                     commonQuaternaries = 0;
609                 }
610                 quaternaries.appendByte(q);
611             }
612         }
613 
614         if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE
615     }
616 
617     if(U_FAILURE(errorCode)) { return; }
618 
619     // Append the beyond-primary levels.
620     UBool ok = TRUE;
621     if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
622         if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
623         ok &= secondaries.isOk();
624         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
625         secondaries.appendTo(sink);
626     }
627 
628     if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
629         if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; }
630         ok &= cases.isOk();
631         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
632         // Write pairs of nibbles as bytes, except separator bytes as themselves.
633         int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.
634         uint8_t b = 0;
635         for(int32_t i = 0; i < length; ++i) {
636             uint8_t c = (uint8_t)cases[i];
637             U_ASSERT((c & 0xf) == 0 && c != 0);
638             if(b == 0) {
639                 b = c;
640             } else {
641                 sink.Append(b | (c >> 4));
642                 b = 0;
643             }
644         }
645         if(b != 0) {
646             sink.Append(b);
647         }
648     }
649 
650     if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
651         if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; }
652         ok &= tertiaries.isOk();
653         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
654         tertiaries.appendTo(sink);
655     }
656 
657     if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
658         if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; }
659         ok &= quaternaries.isOk();
660         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
661         quaternaries.appendTo(sink);
662     }
663 
664     if(!ok || !sink.IsOk()) {
665         errorCode = U_MEMORY_ALLOCATION_ERROR;
666     }
667 }
668 
669 U_NAMESPACE_END
670 
671 #endif  // !UCONFIG_NO_COLLATION
672