1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File line.h
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 07/07/2003 weiv Creation.
17 *******************************************************************************
18 */
19
20 //
21 // class Line
22 //
23 // Each line from the source file (containing a name, presumably) gets
24 // one of these structs.
25 //
26
27 #include "strengthprobe.h"
28
StrengthProbe(CompareFn comparer,GetSortKeyFn getter,UChar SE,UChar B0,UChar B1,UChar B2,UChar B3)29 StrengthProbe::StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE,
30 UChar B0, UChar B1, UChar B2, UChar B3) :
31 SE(SE),
32 B0(B0), B1(B1), B2(B2), B3(B3),
33 utilFirstP(&utilFirst), utilSecondP(&utilSecond),
34 frenchSecondary(false),
35 comparer(comparer), skgetter(getter)
36 {
37 }
38
39 int
setProbeChars(UChar B0,UChar B1,UChar B2,UChar B3)40 StrengthProbe::setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3)
41 {
42 this->B0 = B0;
43 this->B1 = B1;
44 this->B2 = B2;
45 this->
46 B3 = B3;
47 return checkSanity();
48 }
49
50 int
checkSanity()51 StrengthProbe::checkSanity()
52 {
53 int sanityRes;
54 utilFirst.setTo(B0);
55 utilSecond.setTo(B3);
56 if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
57 return sanityRes*10 + 3;
58 }
59 utilSecond.setTo(B2);
60 if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
61 return sanityRes*10 + 2;
62 }
63 utilSecond.setTo(B1);
64 if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
65 return sanityRes*10 + 1;
66 }
67 utilFirst.setTo(B3);
68 utilSecond.setTo(B2);
69 if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
70 return sanityRes*10 + 5;
71 }
72 utilSecond.setTo(B1);
73 if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
74 return sanityRes*10 + 4;
75 }
76 utilFirst.setTo(B2);
77 if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
78 return sanityRes*10 + 6;
79 }
80 utilFirst.setTo(B0);
81 if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
82 return 1000;
83 }
84 utilFirst.setTo(B1);
85 if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
86 return 1001;
87 }
88 utilFirst.setTo(B2);
89 if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
90 return 1002;
91 }
92 utilFirst.setTo(B3);
93 if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
94 return 1003;
95 }
96 return 0;
97 }
98
99 UBool
probePrefix(const Line & x,const Line & y,UChar first,UChar second)100 StrengthProbe::probePrefix(const Line &x, const Line &y, UChar first, UChar second) {
101 utilFirst.name[0] = first;
102 utilFirst.name[1] = SE;
103 u_strcpy(utilFirst.name+2, x.name);
104 utilFirst.name[x.len+2] = 0;
105 utilFirst.len = x.len+2;
106
107 utilSecond.name[0] = second;
108 utilSecond.name[1] = SE;
109 u_strcpy(utilSecond.name+2, y.name);
110 utilSecond.name[y.len+2] = 0;
111 utilSecond.len = y.len+2;
112
113 if(comparer(&utilFirstP, &utilSecondP) < 0) {
114 return true;
115 } else {
116 return false;
117 }
118 }
119
120 UBool
probeSuffix(const Line & x,const Line & y,UChar first,UChar second)121 StrengthProbe::probeSuffix(const Line &x, const Line &y, UChar first, UChar second) {
122 u_strcpy(utilFirst.name, x.name);
123 utilFirst.name[x.len] = SE;
124 utilFirst.name[x.len+1] = first;
125 utilFirst.name[x.len+2] = 0;
126 utilFirst.len = x.len + 2;
127 u_strcpy(utilSecond.name, y.name);
128 utilSecond.name[y.len] = SE;
129 utilSecond.name[y.len+1] = second;
130 utilSecond.name[y.len+2] = 0;
131 utilSecond.len = y.len + 2;
132
133 if(comparer(&utilFirstP, &utilSecondP) < 0) {
134 return true;
135 } else {
136 return false;
137 }
138 }
139
140 UBool
probePrefixNoSep(const Line & x,const Line & y,UChar first,UChar second)141 StrengthProbe::probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
142 utilFirst.name[0] = first;
143 u_strcpy(utilFirst.name+1, x.name);
144 utilFirst.name[x.len+1] = 0;
145 utilFirst.len = x.len + 1;
146
147 utilSecond.name[0] = second;
148 u_strcpy(utilSecond.name+1, y.name);
149 utilSecond.name[y.len+1] = 0;
150 utilSecond.len = y.len + 1;
151
152 if(comparer(&utilFirstP, &utilSecondP) < 0) {
153 return true;
154 } else {
155 return false;
156 }
157 }
158
159 UBool
probeSuffixNoSep(const Line & x,const Line & y,UChar first,UChar second)160 StrengthProbe::probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
161 u_strcpy(utilFirst.name, x.name);
162 utilFirst.name[x.len] = first;
163 utilFirst.name[x.len+1] = 0;
164 utilFirst.len = x.len + 1;
165 u_strcpy(utilSecond.name, y.name);
166 utilSecond.name[y.len] = second;
167 utilSecond.name[y.len+1] = 0;
168 utilSecond.len = y.len + 1;
169
170 if(comparer(&utilFirstP, &utilSecondP) < 0) {
171 return true;
172 } else {
173 return false;
174 }
175 }
176
177 UColAttributeValue
getStrength(const Line & x,const Line & y)178 StrengthProbe::getStrength(const Line &x, const Line &y) {
179 const Line *xp = &x;
180 const Line *yp = &y;
181
182 Line empty;
183 Line *emptyP = ∅
184 if(comparer(&emptyP, &xp) == 0) {
185 return distanceFromEmptyString(y);
186 }
187
188 int32_t result = comparer(&xp, &yp);
189
190 if(result == 0) {
191 return UCOL_IDENTICAL;
192 } else if(result > 0) {
193 return UCOL_OFF; // bad situation
194 } else { // we need to probe strength
195 if(probeSuffix(x, y, B1, B0)) {
196 //if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
197 return UCOL_PRIMARY;
198 } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
199 return UCOL_SECONDARY;
200 } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
201 return UCOL_TERTIARY;
202 } else if(!probePrefix(x, y, B3, B0)) {
203 return UCOL_QUATERNARY;
204 }
205 /*
206 //if(probeSuffix(x, y, B1, B0)) {
207 if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
208 return UCOL_PRIMARY;
209 } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
210 return UCOL_SECONDARY;
211 } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
212 return UCOL_TERTIARY;
213 } else if(!probePrefix(x, y, B3, B0)) {
214 return UCOL_QUATERNARY;
215 }
216 */
217 }
218 return UCOL_OFF; // bad
219 }
220
221 UColAttributeValue
getStrength(const UnicodeString & sx,const UnicodeString & sy)222 StrengthProbe::getStrength(const UnicodeString &sx, const UnicodeString &sy) {
223 Line x(sx);
224 Line y(sy);
225 return getStrength(x, y);
226 }
227
228 int32_t
compare(const UnicodeString & sx,const UnicodeString & sy)229 StrengthProbe::compare(const UnicodeString &sx, const UnicodeString &sy) {
230 Line x(sx);
231 Line y(sy);
232 const Line *xp = &x;
233 const Line *yp = &y;
234 return comparer(&xp, &yp);
235 }
236
237 int32_t
compare(const Line & x,const Line & y)238 StrengthProbe::compare(const Line &x, const Line &y) {
239 const Line *xp = &x;
240 const Line *yp = &y;
241 return comparer(&xp, &yp);
242 }
243
244 UColAttributeValue
distanceFromEmptyString(const Line & x)245 StrengthProbe::distanceFromEmptyString(const Line &x) {
246 if(x.name[0] == 0x30D) {
247 int32_t putBreakPointHere = 0;
248 }
249 Line empty;
250 Line *emptyP = ∅
251 uint8_t buff[256];
252 getSortKey(empty.name, empty.len, buff, 256);
253 Line B0Line(B0);
254 Line *B0LineP = &B0Line;
255 const Line *xp = &x;
256 int32_t result = comparer(&emptyP, &xp);
257 if(result == 0) {
258 return UCOL_IDENTICAL;
259 } else if(result > 0) {
260 return UCOL_OFF;
261 }
262 result = comparer(&B0LineP, &xp);
263 if(result <= 0) {
264 return UCOL_PRIMARY;
265 }
266 Line sexb0(SE);
267 sexb0.append(x.name, x.len);
268 sexb0.append(B0);
269
270 Line seb0(SE);
271 seb0.append(B0);
272 uint8_t seb0K[256];
273 uint8_t sexb0K[256];
274 uint8_t seb2K[256];
275 uint8_t seb3K[256];
276 memset(seb0K, 0, 256);
277 memset(sexb0K, 0, 256);
278 memset(seb2K, 0, 256);
279 memset(seb3K, 0, 256);
280
281 getSortKey(seb0, seb0K, 256);
282 getSortKey(sexb0, sexb0K, 256);
283
284 if(compare(seb0, sexb0) <= 0) {
285 Line seb2(SE);
286 seb2.append(B2);
287 getSortKey(seb2, seb2K, 256);
288 result = compare(seb2, sexb0);
289 if((result <= 0 && !frenchSecondary) || (result >= 0 && frenchSecondary)) { // swamps tertiary difference
290 return UCOL_SECONDARY;
291 }
292 Line seb3(SE);
293 seb3.append(B3);
294 getSortKey(seb3, seb3K, 256);
295 if(compare(seb3, sexb0) < 0) {
296 return UCOL_TERTIARY;
297 }
298 return UCOL_QUATERNARY;
299 } else {
300 // if this was UCA, we would have a primary difference.
301 // however, this might not be so, since not everybody
302 // makes well formed CEs.
303 // in cs_CZ on linux, space is tertiary ignorable, but
304 // its quaternary level strength is lower than quad
305 // strengths for non-ignorables. oh well, more testing
306 // required
307 // I think that we can only have quaternary difference
308 // here (in addition to primary difference).
309 //if(!probePrefix(x, empty, B3, B0)) {
310 //return UCOL_QUATERNARY;
311 //} else {
312 return UCOL_PRIMARY;
313 //}
314 }
315 }
316
317 UColAttributeValue
distanceFromEmptyString(const UnicodeString & x)318 StrengthProbe::distanceFromEmptyString(const UnicodeString &x) {
319 const Line xp(x);
320 return distanceFromEmptyString(xp);
321 }
322
323
324 UColAttributeValue
getPrefixedStrength(const Line & prefix,const Line & x,const Line & y)325 StrengthProbe::getPrefixedStrength(const Line &prefix, const Line &x, const Line &y) {
326 contractionUtilFirst.setToConcat(&prefix, &x);
327 contractionUtilSecond.setToConcat(&prefix, &y);
328 return getStrength(contractionUtilFirst, contractionUtilSecond);
329 }
330
331
StrengthProbe(const StrengthProbe & that)332 StrengthProbe::StrengthProbe(const StrengthProbe &that) {
333 *this = that;
334 }
335
336 StrengthProbe &
operator =(const StrengthProbe & that)337 StrengthProbe::operator=(const StrengthProbe &that) {
338 if(this != &that) {
339 B0 = that.B0;
340 B1 = that.B1;
341 B2 = that.B2;
342 B3 = that.B3;
343 SE = that.SE;
344 frenchSecondary = that.frenchSecondary;
345 comparer = that.comparer;
346 skgetter = that.skgetter;
347
348 utilFirstP = &utilFirst;
349 utilSecondP = &utilSecond;
350 }
351
352 return *this;
353 }
354
355 UBool
isFrenchSecondary(UErrorCode & status)356 StrengthProbe::isFrenchSecondary(UErrorCode &status) {
357 utilFirst.setTo(B0);
358 utilFirst.append(SE);
359 utilFirst.append(B2);
360 utilSecond.setTo(B2);
361 utilSecond.append(SE);
362 utilSecond.append(B0);
363
364 int32_t result = compare(utilFirst, utilSecond);
365
366 if(result < 0) {
367 return false;
368 } else if(result > 0) {
369 frenchSecondary = true;
370 return true;
371 } else {
372 status = U_INTERNAL_PROGRAM_ERROR;
373 return false;
374 }
375 }
376
377 UBool
isUpperFirst(UErrorCode & status)378 StrengthProbe::isUpperFirst(UErrorCode &status) {
379 UChar i = 0;
380 int32_t result = 0;
381 int32_t upper = 0, lower = 0, equal = 0;
382 for(i = 0x41; i < 0x5B; i++) {
383 utilFirst.setTo(i);
384 utilSecond.setTo(i+0x20);
385 result = compare(utilFirst, utilSecond);
386 if(result < 0) {
387 upper++;
388 } else if(result > 0) {
389 lower++;
390 } else {
391 equal++;
392 }
393 }
394
395 if(lower == 0 && equal == 0) {
396 return true;
397 }
398 if(upper == 0 && equal == 0) {
399 return false;
400 }
401 status = U_INTERNAL_PROGRAM_ERROR;
402 return false;
403 }
404
405