• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationkeys.cpp
7 *
8 * created on: 2012sep02
9 * created by: Markus W. Scherer
10 */
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_COLLATION
15 
16 #include "unicode/bytestream.h"
17 #include "collation.h"
18 #include "collationiterator.h"
19 #include "collationkeys.h"
20 #include "collationsettings.h"
21 #include "uassert.h"
22 
23 U_NAMESPACE_BEGIN
24 
~SortKeyByteSink()25 SortKeyByteSink::~SortKeyByteSink() {}
26 
27 void
Append(const char * bytes,int32_t n)28 SortKeyByteSink::Append(const char *bytes, int32_t n) {
29     if (n <= 0 || bytes == NULL) {
30         return;
31     }
32     if (ignore_ > 0) {
33         int32_t ignoreRest = ignore_ - n;
34         if (ignoreRest >= 0) {
35             ignore_ = ignoreRest;
36             return;
37         } else {
38             bytes += ignore_;
39             n = -ignoreRest;
40             ignore_ = 0;
41         }
42     }
43     int32_t length = appended_;
44     appended_ += n;
45     if ((buffer_ + length) == bytes) {
46         return;  // the caller used GetAppendBuffer() and wrote the bytes already
47     }
48     int32_t available = capacity_ - length;
49     if (n <= available) {
50         uprv_memcpy(buffer_ + length, bytes, n);
51     } else {
52         AppendBeyondCapacity(bytes, n, length);
53     }
54 }
55 
56 char *
GetAppendBuffer(int32_t min_capacity,int32_t desired_capacity_hint,char * scratch,int32_t scratch_capacity,int32_t * result_capacity)57 SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
58                                  int32_t desired_capacity_hint,
59                                  char *scratch,
60                                  int32_t scratch_capacity,
61                                  int32_t *result_capacity) {
62     if (min_capacity < 1 || scratch_capacity < min_capacity) {
63         *result_capacity = 0;
64         return NULL;
65     }
66     if (ignore_ > 0) {
67         // Do not write ignored bytes right at the end of the buffer.
68         *result_capacity = scratch_capacity;
69         return scratch;
70     }
71     int32_t available = capacity_ - appended_;
72     if (available >= min_capacity) {
73         *result_capacity = available;
74         return buffer_ + appended_;
75     } else if (Resize(desired_capacity_hint, appended_)) {
76         *result_capacity = capacity_ - appended_;
77         return buffer_ + appended_;
78     } else {
79         *result_capacity = scratch_capacity;
80         return scratch;
81     }
82 }
83 
84 namespace {
85 
86 /**
87  * uint8_t byte buffer, similar to CharString but simpler.
88  */
89 class SortKeyLevel : public UMemory {
90 public:
SortKeyLevel()91     SortKeyLevel() : len(0), ok(TRUE) {}
~SortKeyLevel()92     ~SortKeyLevel() {}
93 
94     /** @return FALSE if memory allocation failed */
isOk() const95     UBool isOk() const { return ok; }
isEmpty() const96     UBool isEmpty() const { return len == 0; }
length() const97     int32_t length() const { return len; }
data() const98     const uint8_t *data() const { return buffer.getAlias(); }
operator [](int32_t index) const99     uint8_t operator[](int32_t index) const { return buffer[index]; }
100 
data()101     uint8_t *data() { return buffer.getAlias(); }
102 
103     void appendByte(uint32_t b);
104     void appendWeight16(uint32_t w);
105     void appendWeight32(uint32_t w);
106     void appendReverseWeight16(uint32_t w);
107 
108     /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */
appendTo(ByteSink & sink) const109     void appendTo(ByteSink &sink) const {
110         U_ASSERT(len > 0 && buffer[len - 1] == 1);
111         sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);
112     }
113 
114 private:
115     MaybeStackArray<uint8_t, 40> buffer;
116     int32_t len;
117     UBool ok;
118 
119     UBool ensureCapacity(int32_t appendCapacity);
120 
121     SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
122     SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
123 };
124 
appendByte(uint32_t b)125 void SortKeyLevel::appendByte(uint32_t b) {
126     if(len < buffer.getCapacity() || ensureCapacity(1)) {
127         buffer[len++] = (uint8_t)b;
128     }
129 }
130 
131 void
appendWeight16(uint32_t w)132 SortKeyLevel::appendWeight16(uint32_t w) {
133     U_ASSERT((w & 0xffff) != 0);
134     uint8_t b0 = (uint8_t)(w >> 8);
135     uint8_t b1 = (uint8_t)w;
136     int32_t appendLength = (b1 == 0) ? 1 : 2;
137     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
138         buffer[len++] = b0;
139         if(b1 != 0) {
140             buffer[len++] = b1;
141         }
142     }
143 }
144 
145 void
appendWeight32(uint32_t w)146 SortKeyLevel::appendWeight32(uint32_t w) {
147     U_ASSERT(w != 0);
148     uint8_t bytes[4] = { (uint8_t)(w >> 24), (uint8_t)(w >> 16), (uint8_t)(w >> 8), (uint8_t)w };
149     int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
150     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
151         buffer[len++] = bytes[0];
152         if(bytes[1] != 0) {
153             buffer[len++] = bytes[1];
154             if(bytes[2] != 0) {
155                 buffer[len++] = bytes[2];
156                 if(bytes[3] != 0) {
157                     buffer[len++] = bytes[3];
158                 }
159             }
160         }
161     }
162 }
163 
164 void
appendReverseWeight16(uint32_t w)165 SortKeyLevel::appendReverseWeight16(uint32_t w) {
166     U_ASSERT((w & 0xffff) != 0);
167     uint8_t b0 = (uint8_t)(w >> 8);
168     uint8_t b1 = (uint8_t)w;
169     int32_t appendLength = (b1 == 0) ? 1 : 2;
170     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
171         if(b1 == 0) {
172             buffer[len++] = b0;
173         } else {
174             buffer[len] = b1;
175             buffer[len + 1] = b0;
176             len += 2;
177         }
178     }
179 }
180 
ensureCapacity(int32_t appendCapacity)181 UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
182     if(!ok) {
183         return FALSE;
184     }
185     int32_t newCapacity = 2 * buffer.getCapacity();
186     int32_t altCapacity = len + 2 * appendCapacity;
187     if (newCapacity < altCapacity) {
188         newCapacity = altCapacity;
189     }
190     if (newCapacity < 200) {
191         newCapacity = 200;
192     }
193     if(buffer.resize(newCapacity, len)==NULL) {
194         return ok = FALSE;
195     }
196     return TRUE;
197 }
198 
199 }  // namespace
200 
~LevelCallback()201 CollationKeys::LevelCallback::~LevelCallback() {}
202 
203 UBool
needToWrite(Collation::Level)204 CollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return TRUE; }
205 
206 /**
207  * Map from collation strength (UColAttributeValue)
208  * to a mask of Collation::Level bits up to that strength,
209  * excluding the CASE_LEVEL which is independent of the strength,
210  * and excluding IDENTICAL_LEVEL which this function does not write.
211  */
212 static const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = {
213     2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
214     6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
215     0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
216     0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
217     0, 0, 0, 0,
218     0, 0, 0, 0,
219     0, 0, 0,
220     0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
221 };
222 
223 void
writeSortKeyUpToQuaternary(CollationIterator & iter,const UBool * compressibleBytes,const CollationSettings & settings,SortKeyByteSink & sink,Collation::Level minLevel,LevelCallback & callback,UBool preflight,UErrorCode & errorCode)224 CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
225                                           const UBool *compressibleBytes,
226                                           const CollationSettings &settings,
227                                           SortKeyByteSink &sink,
228                                           Collation::Level minLevel, LevelCallback &callback,
229                                           UBool preflight, UErrorCode &errorCode) {
230     if(U_FAILURE(errorCode)) { return; }
231 
232     int32_t options = settings.options;
233     // Set of levels to process and write.
234     uint32_t levels = levelMasks[CollationSettings::getStrength(options)];
235     if((options & CollationSettings::CASE_LEVEL) != 0) {
236         levels |= Collation::CASE_LEVEL_FLAG;
237     }
238     // Minus the levels below minLevel.
239     levels &= ~(((uint32_t)1 << minLevel) - 1);
240     if(levels == 0) { return; }
241 
242     uint32_t variableTop;
243     if((options & CollationSettings::ALTERNATE_MASK) == 0) {
244         variableTop = 0;
245     } else {
246         // +1 so that we can use "<" and primary ignorables test out early.
247         variableTop = settings.variableTop + 1;
248     }
249     const uint8_t *reorderTable = settings.reorderTable;
250 
251     uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
252 
253     SortKeyLevel cases;
254     SortKeyLevel secondaries;
255     SortKeyLevel tertiaries;
256     SortKeyLevel quaternaries;
257 
258     uint32_t compressedP1 = 0;  // 0==no compression; otherwise reordered compressible lead byte
259     int32_t commonCases = 0;
260     int32_t commonSecondaries = 0;
261     int32_t commonTertiaries = 0;
262     int32_t commonQuaternaries = 0;
263 
264     uint32_t prevSecondary = 0;
265     UBool anyMergeSeparators = FALSE;
266 
267     for(;;) {
268         // No need to keep all CEs in the buffer when we write a sort key.
269         iter.clearCEsIfNoneRemaining();
270         int64_t ce = iter.nextCE(errorCode);
271         uint32_t p = (uint32_t)(ce >> 32);
272         if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) {
273             // Variable CE, shift it to quaternary level.
274             // Ignore all following primary ignorables, and shift further variable CEs.
275             if(commonQuaternaries != 0) {
276                 --commonQuaternaries;
277                 while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
278                     quaternaries.appendByte(QUAT_COMMON_MIDDLE);
279                     commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
280                 }
281                 // Shifted primary weights are lower than the common weight.
282                 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
283                 commonQuaternaries = 0;
284             }
285             do {
286                 if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
287                     uint32_t p1 = p >> 24;
288                     if(reorderTable != NULL) { p1 = reorderTable[p1]; }
289                     if(p1 >= QUAT_SHIFTED_LIMIT_BYTE) {
290                         // Prevent shifted primary lead bytes from
291                         // overlapping with the common compression range.
292                         quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
293                     }
294                     quaternaries.appendWeight32((p1 << 24) | (p & 0xffffff));
295                 }
296                 do {
297                     ce = iter.nextCE(errorCode);
298                     p = (uint32_t)(ce >> 32);
299                 } while(p == 0);
300             } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);
301         }
302         // ce could be primary ignorable, or NO_CE, or the merge separator,
303         // or a regular primary CE, but it is not variable.
304         // If ce==NO_CE, then write nothing for the primary level but
305         // terminate compression on all levels and then exit the loop.
306         if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
307             uint32_t p1 = p >> 24;
308             if(reorderTable != NULL) { p1 = reorderTable[p1]; }
309             if(p1 != compressedP1) {
310                 if(compressedP1 != 0) {
311                     if(p1 < compressedP1) {
312                         // No primary compression terminator
313                         // at the end of the level or merged segment.
314                         if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
315                             sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);
316                         }
317                     } else {
318                         sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);
319                     }
320                 }
321                 sink.Append(p1);
322                 // Test the un-reordered lead byte for compressibility but
323                 // remember the reordered lead byte.
324                 if(compressibleBytes[p >> 24]) {
325                     compressedP1 = p1;
326                 } else {
327                     compressedP1 = 0;
328                 }
329             }
330             char p2 = (char)(p >> 16);
331             if(p2 != 0) {
332                 char buffer[3] = { p2, (char)(p >> 8), (char)p };
333                 sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);
334             }
335             // Optimization for internalNextSortKeyPart():
336             // When the primary level overflows we can stop because we need not
337             // calculate (preflight) the whole sort key length.
338             if(!preflight && sink.Overflowed()) {
339                 if(U_SUCCESS(errorCode) && !sink.IsOk()) {
340                     errorCode = U_MEMORY_ALLOCATION_ERROR;
341                 }
342                 return;
343             }
344         }
345 
346         uint32_t lower32 = (uint32_t)ce;
347         if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary
348 
349         if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
350             uint32_t s = lower32 >> 16;
351             if(s == 0) {
352                 // secondary ignorable
353             } else if(s == Collation::COMMON_WEIGHT16) {
354                 ++commonSecondaries;
355             } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
356                 if(commonSecondaries != 0) {
357                     --commonSecondaries;
358                     while(commonSecondaries >= SEC_COMMON_MAX_COUNT) {
359                         secondaries.appendByte(SEC_COMMON_MIDDLE);
360                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
361                     }
362                     uint32_t b;
363                     if(s < Collation::COMMON_WEIGHT16) {
364                         b = SEC_COMMON_LOW + commonSecondaries;
365                     } else {
366                         b = SEC_COMMON_HIGH - commonSecondaries;
367                     }
368                     secondaries.appendByte(b);
369                     commonSecondaries = 0;
370                 }
371                 secondaries.appendWeight16(s);
372             } else {
373                 if(commonSecondaries != 0) {
374                     --commonSecondaries;
375                     // Append reverse weights. The level will be re-reversed later.
376                     int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
377                     uint32_t b;
378                     if(prevSecondary < Collation::COMMON_WEIGHT16) {
379                         b = SEC_COMMON_LOW + remainder;
380                     } else {
381                         b = SEC_COMMON_HIGH - remainder;
382                     }
383                     secondaries.appendByte(b);
384                     commonSecondaries -= remainder;
385                     // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
386                     while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT
387                         secondaries.appendByte(SEC_COMMON_MIDDLE);
388                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
389                     }
390                     // commonSecondaries == 0
391                 }
392                 // Reduce separators so that we can look for byte<=1 later.
393                 if(s <= Collation::MERGE_SEPARATOR_WEIGHT16) {
394                     if(s == Collation::MERGE_SEPARATOR_WEIGHT16) {
395                         anyMergeSeparators = TRUE;
396                     }
397                     secondaries.appendByte((s >> 8) - 1);
398                 } else {
399                     secondaries.appendReverseWeight16(s);
400                 }
401                 prevSecondary = s;
402             }
403         }
404 
405         if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
406             if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?
407                     p == 0 : lower32 <= 0xffff) {
408                 // Primary+caseLevel: Ignore case level weights of primary ignorables.
409                 // Otherwise: Ignore case level weights of secondary ignorables.
410                 // For details see the comments in the CollationCompare class.
411             } else {
412                 uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte
413                 U_ASSERT((c & 0xc0) != 0xc0);
414                 if((c & 0xc0) == 0 && c > Collation::MERGE_SEPARATOR_BYTE) {
415                     ++commonCases;
416                 } else {
417                     if((options & CollationSettings::UPPER_FIRST) == 0) {
418                         // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
419                         if(commonCases != 0) {
420                             --commonCases;
421                             while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
422                                 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
423                                 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
424                             }
425                             uint32_t b;
426                             if(c <= Collation::MERGE_SEPARATOR_BYTE) {
427                                 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
428                             } else {
429                                 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
430                             }
431                             cases.appendByte(b << 4);
432                             commonCases = 0;
433                         }
434                         if(c > Collation::MERGE_SEPARATOR_BYTE) {
435                             c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15
436                         }
437                     } else {
438                         // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.
439                         // The compressed common case weights only go up from the "low" value
440                         // because with upperFirst the common weight is the highest one.
441                         if(commonCases != 0) {
442                             --commonCases;
443                             while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
444                                 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
445                                 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
446                             }
447                             cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
448                             commonCases = 0;
449                         }
450                         if(c > Collation::MERGE_SEPARATOR_BYTE) {
451                             c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1
452                         }
453                     }
454                     // c is a separator byte 01 or 02,
455                     // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
456                     cases.appendByte(c);
457                 }
458             }
459         }
460 
461         if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
462             uint32_t t = lower32 & tertiaryMask;
463             U_ASSERT((lower32 & 0xc000) != 0xc000);
464             if(t == Collation::COMMON_WEIGHT16) {
465                 ++commonTertiaries;
466             } else if((tertiaryMask & 0x8000) == 0) {
467                 // Tertiary weights without case bits.
468                 // Move lead bytes 06..3F to C6..FF for a large common-weight range.
469                 if(commonTertiaries != 0) {
470                     --commonTertiaries;
471                     while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
472                         tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
473                         commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
474                     }
475                     uint32_t b;
476                     if(t < Collation::COMMON_WEIGHT16) {
477                         b = TER_ONLY_COMMON_LOW + commonTertiaries;
478                     } else {
479                         b = TER_ONLY_COMMON_HIGH - commonTertiaries;
480                     }
481                     tertiaries.appendByte(b);
482                     commonTertiaries = 0;
483                 }
484                 if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; }
485                 tertiaries.appendWeight16(t);
486             } else if((options & CollationSettings::UPPER_FIRST) == 0) {
487                 // Tertiary weights with caseFirst=lowerFirst.
488                 // Move lead bytes 06..BF to 46..FF for the common-weight range.
489                 if(commonTertiaries != 0) {
490                     --commonTertiaries;
491                     while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
492                         tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
493                         commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
494                     }
495                     uint32_t b;
496                     if(t < Collation::COMMON_WEIGHT16) {
497                         b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
498                     } else {
499                         b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
500                     }
501                     tertiaries.appendByte(b);
502                     commonTertiaries = 0;
503                 }
504                 if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; }
505                 tertiaries.appendWeight16(t);
506             } else {
507                 // Tertiary weights with caseFirst=upperFirst.
508                 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
509                 // to keep tertiary CEs well-formed.
510                 // Their case+tertiary weights must be greater than those of
511                 // primary and secondary CEs.
512                 //
513                 // Separators    01..02 -> 01..02  (unchanged)
514                 // Lowercase     03..04 -> 83..84  (includes uncased)
515                 // Common weight     05 -> 85..C5  (common-weight compression range)
516                 // Lowercase     06..3F -> C6..FF
517                 // Mixed case    43..7F -> 43..7F
518                 // Uppercase     83..BF -> 03..3F
519                 // Tertiary CE   86..BF -> C6..FF
520                 if(t <= Collation::MERGE_SEPARATOR_WEIGHT16) {
521                     // Keep separators unchanged.
522                 } else if(lower32 > 0xffff) {
523                     // Invert case bits of primary & secondary CEs.
524                     t ^= 0xc000;
525                     if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
526                         t -= 0x4000;
527                     }
528                 } else {
529                     // Keep uppercase bits of tertiary CEs.
530                     U_ASSERT(0x8600 <= t && t <= 0xbfff);
531                     t += 0x4000;
532                 }
533                 if(commonTertiaries != 0) {
534                     --commonTertiaries;
535                     while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
536                         tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
537                         commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
538                     }
539                     uint32_t b;
540                     if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
541                         b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
542                     } else {
543                         b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
544                     }
545                     tertiaries.appendByte(b);
546                     commonTertiaries = 0;
547                 }
548                 tertiaries.appendWeight16(t);
549             }
550         }
551 
552         if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
553             uint32_t q = lower32 & 0xffff;
554             if((q & 0xc0) == 0 && q > Collation::MERGE_SEPARATOR_WEIGHT16) {
555                 ++commonQuaternaries;
556             } else if(q <= Collation::MERGE_SEPARATOR_WEIGHT16 &&
557                     (options & CollationSettings::ALTERNATE_MASK) == 0 &&
558                     (quaternaries.isEmpty() ||
559                         quaternaries[quaternaries.length() - 1] == Collation::MERGE_SEPARATOR_BYTE)) {
560                 // If alternate=non-ignorable and there are only
561                 // common quaternary weights between two separators,
562                 // then we need not write anything between these separators.
563                 // The only weights greater than the merge separator and less than the common weight
564                 // are shifted primary weights, which are not generated for alternate=non-ignorable.
565                 // There are also exactly as many quaternary weights as tertiary weights,
566                 // so level length differences are handled already on tertiary level.
567                 // Any above-common quaternary weight will compare greater regardless.
568                 quaternaries.appendByte(q >> 8);
569             } else {
570                 if(q <= Collation::MERGE_SEPARATOR_WEIGHT16) {
571                     q >>= 8;
572                 } else {
573                     q = 0xfc + ((q >> 6) & 3);
574                 }
575                 if(commonQuaternaries != 0) {
576                     --commonQuaternaries;
577                     while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
578                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);
579                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
580                     }
581                     uint32_t b;
582                     if(q < QUAT_COMMON_LOW) {
583                         b = QUAT_COMMON_LOW + commonQuaternaries;
584                     } else {
585                         b = QUAT_COMMON_HIGH - commonQuaternaries;
586                     }
587                     quaternaries.appendByte(b);
588                     commonQuaternaries = 0;
589                 }
590                 quaternaries.appendByte(q);
591             }
592         }
593 
594         if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE
595     }
596 
597     if(U_FAILURE(errorCode)) { return; }
598 
599     // Append the beyond-primary levels.
600     UBool ok = TRUE;
601     if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
602         if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
603         ok &= secondaries.isOk();
604         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
605         uint8_t *secs = secondaries.data();
606         int32_t length = secondaries.length() - 1;  // Ignore the trailing NO_CE.
607         if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
608             // The backwards secondary level compares secondary weights backwards
609             // within segments separated by the merge separator (U+FFFE, weight 02).
610             // The separator weights 01 & 02 were reduced to 00 & 01 so that
611             // we do not accidentally separate at a _second_ weight byte of 02.
612             int32_t start = 0;
613             for(;;) {
614                 // Find the merge separator or the NO_CE terminator.
615                 int32_t limit;
616                 if(anyMergeSeparators) {
617                     limit = start;
618                     while(secs[limit] > 1) { ++limit; }
619                 } else {
620                     limit = length;
621                 }
622                 // Reverse this segment.
623                 if(start < limit) {
624                     uint8_t *p = secs + start;
625                     uint8_t *q = secs + limit - 1;
626                     while(p < q) {
627                         uint8_t s = *p;
628                         *p++ = *q;
629                         *q-- = s;
630                     }
631                 }
632                 // Did we reach the end of the string?
633                 if(secs[limit] == 0) { break; }
634                 // Restore the merge separator.
635                 secs[limit] = 2;
636                 // Skip the merge separator and continue.
637                 start = limit + 1;
638             }
639         }
640         sink.Append(reinterpret_cast<char *>(secs), length);
641     }
642 
643     if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
644         if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; }
645         ok &= cases.isOk();
646         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
647         // Write pairs of nibbles as bytes, except separator bytes as themselves.
648         int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.
649         uint8_t b = 0;
650         for(int32_t i = 0; i < length; ++i) {
651             uint8_t c = (uint8_t)cases[i];
652             if(c <= Collation::MERGE_SEPARATOR_BYTE) {
653                 U_ASSERT(c != 0);
654                 if(b != 0) {
655                     sink.Append(b);
656                     b = 0;
657                 }
658                 sink.Append(c);
659             } else {
660                 U_ASSERT((c & 0xf) == 0);
661                 if(b == 0) {
662                     b = c;
663                 } else {
664                     sink.Append(b | (c >> 4));
665                     b = 0;
666                 }
667             }
668         }
669         if(b != 0) {
670             sink.Append(b);
671         }
672     }
673 
674     if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
675         if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; }
676         ok &= tertiaries.isOk();
677         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
678         tertiaries.appendTo(sink);
679     }
680 
681     if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
682         if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; }
683         ok &= quaternaries.isOk();
684         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
685         quaternaries.appendTo(sink);
686     }
687 
688     if(!ok || !sink.IsOk()) {
689         errorCode = U_MEMORY_ALLOCATION_ERROR;
690     }
691 }
692 
693 U_NAMESPACE_END
694 
695 #endif  // !UCONFIG_NO_COLLATION
696