1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2013-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * collationsettings.cpp
9 *
10 * created on: 2013feb07
11 * created by: Markus W. Scherer
12 */
13
14 #include "unicode/utypes.h"
15
16 #if !UCONFIG_NO_COLLATION
17
18 #include "unicode/ucol.h"
19 #include "cmemory.h"
20 #include "collation.h"
21 #include "collationdata.h"
22 #include "collationsettings.h"
23 #include "sharedobject.h"
24 #include "uassert.h"
25 #include "umutex.h"
26 #include "uvectr32.h"
27
28 U_NAMESPACE_BEGIN
29
CollationSettings(const CollationSettings & other)30 CollationSettings::CollationSettings(const CollationSettings &other)
31 : SharedObject(other),
32 options(other.options), variableTop(other.variableTop),
33 reorderTable(NULL),
34 minHighNoReorder(other.minHighNoReorder),
35 reorderRanges(NULL), reorderRangesLength(0),
36 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
37 fastLatinOptions(other.fastLatinOptions) {
38 UErrorCode errorCode = U_ZERO_ERROR;
39 copyReorderingFrom(other, errorCode);
40 if(fastLatinOptions >= 0) {
41 uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
42 }
43 }
44
~CollationSettings()45 CollationSettings::~CollationSettings() {
46 if(reorderCodesCapacity != 0) {
47 uprv_free(const_cast<int32_t *>(reorderCodes));
48 }
49 }
50
51 UBool
operator ==(const CollationSettings & other) const52 CollationSettings::operator==(const CollationSettings &other) const {
53 if(options != other.options) { return FALSE; }
54 if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
55 if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
56 for(int32_t i = 0; i < reorderCodesLength; ++i) {
57 if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
58 }
59 return TRUE;
60 }
61
62 int32_t
hashCode() const63 CollationSettings::hashCode() const {
64 int32_t h = options << 8;
65 if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
66 h ^= reorderCodesLength;
67 for(int32_t i = 0; i < reorderCodesLength; ++i) {
68 h ^= (reorderCodes[i] << i);
69 }
70 return h;
71 }
72
73 void
resetReordering()74 CollationSettings::resetReordering() {
75 // When we turn off reordering, we want to set a NULL permutation
76 // rather than a no-op permutation.
77 // Keep the memory via reorderCodes and its capacity.
78 reorderTable = NULL;
79 minHighNoReorder = 0;
80 reorderRangesLength = 0;
81 reorderCodesLength = 0;
82 }
83
84 void
aliasReordering(const CollationData & data,const int32_t * codes,int32_t length,const uint32_t * ranges,int32_t rangesLength,const uint8_t * table,UErrorCode & errorCode)85 CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
86 const uint32_t *ranges, int32_t rangesLength,
87 const uint8_t *table, UErrorCode &errorCode) {
88 if(U_FAILURE(errorCode)) { return; }
89 if(table != NULL &&
90 (rangesLength == 0 ?
91 !reorderTableHasSplitBytes(table) :
92 rangesLength >= 2 &&
93 // The first offset must be 0. The last offset must not be 0.
94 (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
95 // We need to release the memory before setting the alias pointer.
96 if(reorderCodesCapacity != 0) {
97 uprv_free(const_cast<int32_t *>(reorderCodes));
98 reorderCodesCapacity = 0;
99 }
100 reorderTable = table;
101 reorderCodes = codes;
102 reorderCodesLength = length;
103 // Drop ranges before the first split byte. They are reordered by the table.
104 // This then speeds up reordering of the remaining ranges.
105 int32_t firstSplitByteRangeIndex = 0;
106 while(firstSplitByteRangeIndex < rangesLength &&
107 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
108 // The second byte of the primary limit is 0.
109 ++firstSplitByteRangeIndex;
110 }
111 if(firstSplitByteRangeIndex == rangesLength) {
112 U_ASSERT(!reorderTableHasSplitBytes(table));
113 minHighNoReorder = 0;
114 reorderRanges = NULL;
115 reorderRangesLength = 0;
116 } else {
117 U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
118 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
119 reorderRanges = ranges + firstSplitByteRangeIndex;
120 reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
121 }
122 return;
123 }
124 // Regenerate missing data.
125 setReordering(data, codes, length, errorCode);
126 }
127
128 void
setReordering(const CollationData & data,const int32_t * codes,int32_t codesLength,UErrorCode & errorCode)129 CollationSettings::setReordering(const CollationData &data,
130 const int32_t *codes, int32_t codesLength,
131 UErrorCode &errorCode) {
132 if(U_FAILURE(errorCode)) { return; }
133 if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
134 resetReordering();
135 return;
136 }
137 UVector32 rangesList(errorCode);
138 data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
139 if(U_FAILURE(errorCode)) { return; }
140 int32_t rangesLength = rangesList.size();
141 if(rangesLength == 0) {
142 resetReordering();
143 return;
144 }
145 const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
146 // ranges[] contains at least two (limit, offset) pairs.
147 // The first offset must be 0. The last offset must not be 0.
148 // Separators (at the low end) and trailing weights (at the high end)
149 // are never reordered.
150 U_ASSERT(rangesLength >= 2);
151 U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
152 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
153
154 // Write the lead byte permutation table.
155 // Set a 0 for each lead byte that has a range boundary in the middle.
156 uint8_t table[256];
157 int32_t b = 0;
158 int32_t firstSplitByteRangeIndex = -1;
159 for(int32_t i = 0; i < rangesLength; ++i) {
160 uint32_t pair = ranges[i];
161 int32_t limit1 = (int32_t)(pair >> 24);
162 while(b < limit1) {
163 table[b] = (uint8_t)(b + pair);
164 ++b;
165 }
166 // Check the second byte of the limit.
167 if((pair & 0xff0000) != 0) {
168 table[limit1] = 0;
169 b = limit1 + 1;
170 if(firstSplitByteRangeIndex < 0) {
171 firstSplitByteRangeIndex = i;
172 }
173 }
174 }
175 while(b <= 0xff) {
176 table[b] = (uint8_t)b;
177 ++b;
178 }
179 if(firstSplitByteRangeIndex < 0) {
180 // The lead byte permutation table alone suffices for reordering.
181 rangesLength = 0;
182 } else {
183 // Remove the ranges below the first split byte.
184 ranges += firstSplitByteRangeIndex;
185 rangesLength -= firstSplitByteRangeIndex;
186 }
187 setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
188 }
189
190 void
setReorderArrays(const int32_t * codes,int32_t codesLength,const uint32_t * ranges,int32_t rangesLength,const uint8_t * table,UErrorCode & errorCode)191 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
192 const uint32_t *ranges, int32_t rangesLength,
193 const uint8_t *table, UErrorCode &errorCode) {
194 if(U_FAILURE(errorCode)) { return; }
195 int32_t *ownedCodes;
196 int32_t totalLength = codesLength + rangesLength;
197 U_ASSERT(totalLength > 0);
198 if(totalLength <= reorderCodesCapacity) {
199 ownedCodes = const_cast<int32_t *>(reorderCodes);
200 } else {
201 // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
202 int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
203 ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
204 if(ownedCodes == NULL) {
205 resetReordering();
206 errorCode = U_MEMORY_ALLOCATION_ERROR;
207 return;
208 }
209 if(reorderCodesCapacity != 0) {
210 uprv_free(const_cast<int32_t *>(reorderCodes));
211 }
212 reorderCodes = ownedCodes;
213 reorderCodesCapacity = capacity;
214 }
215 uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
216 uprv_memcpy(ownedCodes, codes, codesLength * 4);
217 uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
218 reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
219 reorderCodesLength = codesLength;
220 reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
221 reorderRangesLength = rangesLength;
222 }
223
224 void
copyReorderingFrom(const CollationSettings & other,UErrorCode & errorCode)225 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
226 if(U_FAILURE(errorCode)) { return; }
227 if(!other.hasReordering()) {
228 resetReordering();
229 return;
230 }
231 minHighNoReorder = other.minHighNoReorder;
232 if(other.reorderCodesCapacity == 0) {
233 // The reorder arrays are aliased to memory-mapped data.
234 reorderTable = other.reorderTable;
235 reorderRanges = other.reorderRanges;
236 reorderRangesLength = other.reorderRangesLength;
237 reorderCodes = other.reorderCodes;
238 reorderCodesLength = other.reorderCodesLength;
239 } else {
240 setReorderArrays(other.reorderCodes, other.reorderCodesLength,
241 other.reorderRanges, other.reorderRangesLength,
242 other.reorderTable, errorCode);
243 }
244 }
245
246 UBool
reorderTableHasSplitBytes(const uint8_t table[256])247 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
248 U_ASSERT(table[0] == 0);
249 for(int32_t i = 1; i < 256; ++i) {
250 if(table[i] == 0) {
251 return TRUE;
252 }
253 }
254 return FALSE;
255 }
256
257 uint32_t
reorderEx(uint32_t p) const258 CollationSettings::reorderEx(uint32_t p) const {
259 if(p >= minHighNoReorder) { return p; }
260 // Round up p so that its lower 16 bits are >= any offset bits.
261 // Then compare q directly with (limit, offset) pairs.
262 uint32_t q = p | 0xffff;
263 uint32_t r;
264 const uint32_t *ranges = reorderRanges;
265 while(q >= (r = *ranges)) { ++ranges; }
266 return p + (r << 24);
267 }
268
269 void
setStrength(int32_t value,int32_t defaultOptions,UErrorCode & errorCode)270 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
271 if(U_FAILURE(errorCode)) { return; }
272 int32_t noStrength = options & ~STRENGTH_MASK;
273 switch(value) {
274 case UCOL_PRIMARY:
275 case UCOL_SECONDARY:
276 case UCOL_TERTIARY:
277 case UCOL_QUATERNARY:
278 case UCOL_IDENTICAL:
279 options = noStrength | (value << STRENGTH_SHIFT);
280 break;
281 case UCOL_DEFAULT:
282 options = noStrength | (defaultOptions & STRENGTH_MASK);
283 break;
284 default:
285 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286 break;
287 }
288 }
289
290 void
setFlag(int32_t bit,UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)291 CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
292 int32_t defaultOptions, UErrorCode &errorCode) {
293 if(U_FAILURE(errorCode)) { return; }
294 switch(value) {
295 case UCOL_ON:
296 options |= bit;
297 break;
298 case UCOL_OFF:
299 options &= ~bit;
300 break;
301 case UCOL_DEFAULT:
302 options = (options & ~bit) | (defaultOptions & bit);
303 break;
304 default:
305 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
306 break;
307 }
308 }
309
310 void
setCaseFirst(UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)311 CollationSettings::setCaseFirst(UColAttributeValue value,
312 int32_t defaultOptions, UErrorCode &errorCode) {
313 if(U_FAILURE(errorCode)) { return; }
314 int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
315 switch(value) {
316 case UCOL_OFF:
317 options = noCaseFirst;
318 break;
319 case UCOL_LOWER_FIRST:
320 options = noCaseFirst | CASE_FIRST;
321 break;
322 case UCOL_UPPER_FIRST:
323 options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
324 break;
325 case UCOL_DEFAULT:
326 options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
327 break;
328 default:
329 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
330 break;
331 }
332 }
333
334 void
setAlternateHandling(UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)335 CollationSettings::setAlternateHandling(UColAttributeValue value,
336 int32_t defaultOptions, UErrorCode &errorCode) {
337 if(U_FAILURE(errorCode)) { return; }
338 int32_t noAlternate = options & ~ALTERNATE_MASK;
339 switch(value) {
340 case UCOL_NON_IGNORABLE:
341 options = noAlternate;
342 break;
343 case UCOL_SHIFTED:
344 options = noAlternate | SHIFTED;
345 break;
346 case UCOL_DEFAULT:
347 options = noAlternate | (defaultOptions & ALTERNATE_MASK);
348 break;
349 default:
350 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
351 break;
352 }
353 }
354
355 void
setMaxVariable(int32_t value,int32_t defaultOptions,UErrorCode & errorCode)356 CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
357 if(U_FAILURE(errorCode)) { return; }
358 int32_t noMax = options & ~MAX_VARIABLE_MASK;
359 switch(value) {
360 case MAX_VAR_SPACE:
361 case MAX_VAR_PUNCT:
362 case MAX_VAR_SYMBOL:
363 case MAX_VAR_CURRENCY:
364 options = noMax | (value << MAX_VARIABLE_SHIFT);
365 break;
366 case UCOL_DEFAULT:
367 options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
368 break;
369 default:
370 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
371 break;
372 }
373 }
374
375 U_NAMESPACE_END
376
377 #endif // !UCONFIG_NO_COLLATION
378