1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationrootelements.cpp
7 *
8 * created on: 2013mar05
9 * created by: Markus W. Scherer
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION
15
16 #include "collation.h"
17 #include "collationrootelements.h"
18 #include "uassert.h"
19
20 U_NAMESPACE_BEGIN
21
22 int64_t
lastCEWithPrimaryBefore(uint32_t p) const23 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
24 if(p == 0) { return 0; }
25 U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
26 int32_t index = findP(p);
27 uint32_t q = elements[index];
28 uint32_t secTer;
29 if(p == (q & 0xffffff00)) {
30 // p == elements[index] is a root primary. Find the CE before it.
31 // We must not be in a primary range.
32 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
33 secTer = elements[index - 1];
34 if((secTer & SEC_TER_DELTA_FLAG) == 0) {
35 // Primary CE just before p.
36 p = secTer & 0xffffff00;
37 secTer = Collation::COMMON_SEC_AND_TER_CE;
38 } else {
39 // secTer = last secondary & tertiary for the previous primary
40 index -= 2;
41 for(;;) {
42 p = elements[index];
43 if((p & SEC_TER_DELTA_FLAG) == 0) {
44 p &= 0xffffff00;
45 break;
46 }
47 --index;
48 }
49 }
50 } else {
51 // p > elements[index] which is the previous primary.
52 // Find the last secondary & tertiary weights for it.
53 p = q & 0xffffff00;
54 secTer = Collation::COMMON_SEC_AND_TER_CE;
55 for(;;) {
56 q = elements[++index];
57 if((q & SEC_TER_DELTA_FLAG) == 0) {
58 // We must not be in a primary range.
59 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
60 break;
61 }
62 secTer = q;
63 }
64 }
65 return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
66 }
67
68 int64_t
firstCEWithPrimaryAtLeast(uint32_t p) const69 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
70 if(p == 0) { return 0; }
71 int32_t index = findP(p);
72 if(p != (elements[index] & 0xffffff00)) {
73 for(;;) {
74 p = elements[++index];
75 if((p & SEC_TER_DELTA_FLAG) == 0) {
76 // First primary after p. We must not be in a primary range.
77 U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
78 break;
79 }
80 }
81 }
82 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
83 return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
84 }
85
86 uint32_t
getPrimaryBefore(uint32_t p,UBool isCompressible) const87 CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
88 int32_t index = findPrimary(p);
89 int32_t step;
90 uint32_t q = elements[index];
91 if(p == (q & 0xffffff00)) {
92 // Found p itself. Return the previous primary.
93 // See if p is at the end of a previous range.
94 step = (int32_t)q & PRIMARY_STEP_MASK;
95 if(step == 0) {
96 // p is not at the end of a range. Look for the previous primary.
97 do {
98 p = elements[--index];
99 } while((p & SEC_TER_DELTA_FLAG) != 0);
100 return p & 0xffffff00;
101 }
102 } else {
103 // p is in a range, and not at the start.
104 uint32_t nextElement = elements[index + 1];
105 U_ASSERT(isEndOfPrimaryRange(nextElement));
106 step = (int32_t)nextElement & PRIMARY_STEP_MASK;
107 }
108 // Return the previous range primary.
109 if((p & 0xffff) == 0) {
110 return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
111 } else {
112 return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
113 }
114 }
115
116 uint32_t
getSecondaryBefore(uint32_t p,uint32_t s) const117 CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
118 int32_t index;
119 uint32_t previousSec, sec;
120 if(p == 0) {
121 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
122 // Gap at the beginning of the secondary CE range.
123 previousSec = 0;
124 sec = elements[index] >> 16;
125 } else {
126 index = findPrimary(p) + 1;
127 previousSec = Collation::BEFORE_WEIGHT16;
128 sec = getFirstSecTerForPrimary(index) >> 16;
129 }
130 U_ASSERT(s >= sec);
131 while(s > sec) {
132 previousSec = sec;
133 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
134 sec = elements[index++] >> 16;
135 }
136 U_ASSERT(sec == s);
137 return previousSec;
138 }
139
140 uint32_t
getTertiaryBefore(uint32_t p,uint32_t s,uint32_t t) const141 CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
142 U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
143 int32_t index;
144 uint32_t previousTer, secTer;
145 if(p == 0) {
146 if(s == 0) {
147 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
148 // Gap at the beginning of the tertiary CE range.
149 previousTer = 0;
150 } else {
151 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
152 previousTer = Collation::BEFORE_WEIGHT16;
153 }
154 secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
155 } else {
156 index = findPrimary(p) + 1;
157 previousTer = Collation::BEFORE_WEIGHT16;
158 secTer = getFirstSecTerForPrimary(index);
159 }
160 uint32_t st = (s << 16) | t;
161 while(st > secTer) {
162 if((secTer >> 16) == s) { previousTer = secTer; }
163 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
164 secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
165 }
166 U_ASSERT(secTer == st);
167 return previousTer & 0xffff;
168 }
169
170 uint32_t
getPrimaryAfter(uint32_t p,int32_t index,UBool isCompressible) const171 CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
172 U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
173 uint32_t q = elements[++index];
174 int32_t step;
175 if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
176 // Return the next primary in this range.
177 if((p & 0xffff) == 0) {
178 return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
179 } else {
180 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
181 }
182 } else {
183 // Return the next primary in the list.
184 while((q & SEC_TER_DELTA_FLAG) != 0) {
185 q = elements[++index];
186 }
187 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
188 return q;
189 }
190 }
191
192 uint32_t
getSecondaryAfter(int32_t index,uint32_t s) const193 CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
194 uint32_t secTer;
195 uint32_t secLimit;
196 if(index == 0) {
197 // primary = 0
198 U_ASSERT(s != 0);
199 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
200 secTer = elements[index];
201 // Gap at the end of the secondary CE range.
202 secLimit = 0x10000;
203 } else {
204 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
205 secTer = getFirstSecTerForPrimary(index + 1);
206 // If this is an explicit sec/ter unit, then it will be read once more.
207 // Gap for secondaries of primary CEs.
208 secLimit = getSecondaryBoundary();
209 }
210 for(;;) {
211 uint32_t sec = secTer >> 16;
212 if(sec > s) { return sec; }
213 secTer = elements[++index];
214 if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
215 }
216 }
217
218 uint32_t
getTertiaryAfter(int32_t index,uint32_t s,uint32_t t) const219 CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
220 uint32_t secTer;
221 uint32_t terLimit;
222 if(index == 0) {
223 // primary = 0
224 if(s == 0) {
225 U_ASSERT(t != 0);
226 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
227 // Gap at the end of the tertiary CE range.
228 terLimit = 0x4000;
229 } else {
230 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
231 // Gap for tertiaries of primary/secondary CEs.
232 terLimit = getTertiaryBoundary();
233 }
234 secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
235 } else {
236 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
237 secTer = getFirstSecTerForPrimary(index + 1);
238 // If this is an explicit sec/ter unit, then it will be read once more.
239 terLimit = getTertiaryBoundary();
240 }
241 uint32_t st = (s << 16) | t;
242 for(;;) {
243 if(secTer > st) {
244 U_ASSERT((secTer >> 16) == s);
245 return secTer & 0xffff;
246 }
247 secTer = elements[++index];
248 // No tertiary greater than t for this primary+secondary.
249 if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
250 secTer &= ~SEC_TER_DELTA_FLAG;
251 }
252 }
253
254 uint32_t
getFirstSecTerForPrimary(int32_t index) const255 CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
256 uint32_t secTer = elements[index];
257 if((secTer & SEC_TER_DELTA_FLAG) == 0) {
258 // No sec/ter delta.
259 return Collation::COMMON_SEC_AND_TER_CE;
260 }
261 secTer &= ~SEC_TER_DELTA_FLAG;
262 if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
263 // Implied sec/ter.
264 return Collation::COMMON_SEC_AND_TER_CE;
265 }
266 // Explicit sec/ter below common/common.
267 return secTer;
268 }
269
270 int32_t
findPrimary(uint32_t p) const271 CollationRootElements::findPrimary(uint32_t p) const {
272 // Requirement: p must occur as a root primary.
273 U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary
274 int32_t index = findP(p);
275 // If p is in a range, then we just assume that p is an actual primary in this range.
276 // (Too cumbersome/expensive to check.)
277 // Otherwise, it must be an exact match.
278 U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
279 return index;
280 }
281
282 int32_t
findP(uint32_t p) const283 CollationRootElements::findP(uint32_t p) const {
284 // p need not occur as a root primary.
285 // For example, it might be a reordering group boundary.
286 U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
287 // modified binary search
288 int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
289 U_ASSERT(p >= elements[start]);
290 int32_t limit = length - 1;
291 U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
292 U_ASSERT(p < elements[limit]);
293 while((start + 1) < limit) {
294 // Invariant: elements[start] and elements[limit] are primaries,
295 // and elements[start]<=p<=elements[limit].
296 int32_t i = (start + limit) / 2;
297 uint32_t q = elements[i];
298 if((q & SEC_TER_DELTA_FLAG) != 0) {
299 // Find the next primary.
300 int32_t j = i + 1;
301 for(;;) {
302 if(j == limit) { break; }
303 q = elements[j];
304 if((q & SEC_TER_DELTA_FLAG) == 0) {
305 i = j;
306 break;
307 }
308 ++j;
309 }
310 if((q & SEC_TER_DELTA_FLAG) != 0) {
311 // Find the preceding primary.
312 j = i - 1;
313 for(;;) {
314 if(j == start) { break; }
315 q = elements[j];
316 if((q & SEC_TER_DELTA_FLAG) == 0) {
317 i = j;
318 break;
319 }
320 --j;
321 }
322 if((q & SEC_TER_DELTA_FLAG) != 0) {
323 // No primary between start and limit.
324 break;
325 }
326 }
327 }
328 if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary.
329 limit = i;
330 } else {
331 start = i;
332 }
333 }
334 return start;
335 }
336
337 U_NAMESPACE_END
338
339 #endif // !UCONFIG_NO_COLLATION
340