• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File line.h
12 *
13 * Modification History:
14 *
15 *   Date        Name        Description
16 *   07/07/2003  weiv        Creation.
17 *******************************************************************************
18 */
19 
20 //
21 //   class Line
22 //
23 //      Each line from the source file (containing a name, presumably) gets
24 //      one of these structs.
25 //
26 
27 #include "strengthprobe.h"
28 
StrengthProbe(CompareFn comparer,GetSortKeyFn getter,UChar SE,UChar B0,UChar B1,UChar B2,UChar B3)29 StrengthProbe::StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE,
30                              UChar B0, UChar B1, UChar B2, UChar B3) :
31 SE(SE),
32 B0(B0), B1(B1), B2(B2), B3(B3),
33 utilFirstP(&utilFirst), utilSecondP(&utilSecond),
34 frenchSecondary(FALSE),
35 comparer(comparer), skgetter(getter)
36 {
37 }
38 
39 int
setProbeChars(UChar B0,UChar B1,UChar B2,UChar B3)40 StrengthProbe::setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3)
41 {
42   this->B0 = B0;
43   this->B1 = B1;
44   this->B2 = B2;
45   this->
46 B3 = B3;
47   return checkSanity();
48 }
49 
50 int
checkSanity()51 StrengthProbe::checkSanity()
52 {
53   int sanityRes;
54   utilFirst.setTo(B0);
55   utilSecond.setTo(B3);
56   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
57     return sanityRes*10 + 3;
58   }
59   utilSecond.setTo(B2);
60   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
61     return sanityRes*10 + 2;
62   }
63   utilSecond.setTo(B1);
64   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
65     return sanityRes*10 + 1;
66   }
67   utilFirst.setTo(B3);
68   utilSecond.setTo(B2);
69   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
70     return sanityRes*10 + 5;
71   }
72   utilSecond.setTo(B1);
73   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
74     return sanityRes*10 + 4;
75   }
76   utilFirst.setTo(B2);
77   if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
78     return sanityRes*10 + 6;
79   }
80   utilFirst.setTo(B0);
81   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
82     return 1000;
83   }
84   utilFirst.setTo(B1);
85   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
86     return 1001;
87   }
88   utilFirst.setTo(B2);
89   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
90     return 1002;
91   }
92   utilFirst.setTo(B3);
93   if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
94     return 1003;
95   }
96   return 0;
97 }
98 
99 UBool
probePrefix(const Line & x,const Line & y,UChar first,UChar second)100 StrengthProbe::probePrefix(const Line &x, const Line &y, UChar first, UChar second) {
101   utilFirst.name[0] = first;
102   utilFirst.name[1] = SE;
103   u_strcpy(utilFirst.name+2, x.name);
104   utilFirst.name[x.len+2] = 0;
105   utilFirst.len = x.len+2;
106 
107   utilSecond.name[0] = second;
108   utilSecond.name[1] = SE;
109   u_strcpy(utilSecond.name+2, y.name);
110   utilSecond.name[y.len+2] = 0;
111   utilSecond.len = y.len+2;
112 
113   if(comparer(&utilFirstP, &utilSecondP) < 0) {
114     return TRUE;
115   } else {
116     return FALSE;
117   }
118 }
119 
120 UBool
probeSuffix(const Line & x,const Line & y,UChar first,UChar second)121 StrengthProbe::probeSuffix(const Line &x, const Line &y, UChar first, UChar second) {
122   u_strcpy(utilFirst.name, x.name);
123   utilFirst.name[x.len] = SE;
124   utilFirst.name[x.len+1] = first;
125   utilFirst.name[x.len+2] = 0;
126   utilFirst.len = x.len + 2;
127   u_strcpy(utilSecond.name, y.name);
128   utilSecond.name[y.len] = SE;
129   utilSecond.name[y.len+1] = second;
130   utilSecond.name[y.len+2] = 0;
131   utilSecond.len = y.len + 2;
132 
133   if(comparer(&utilFirstP, &utilSecondP) < 0) {
134     return TRUE;
135   } else {
136     return FALSE;
137   }
138 }
139 
140 UBool
probePrefixNoSep(const Line & x,const Line & y,UChar first,UChar second)141 StrengthProbe::probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
142   utilFirst.name[0] = first;
143   u_strcpy(utilFirst.name+1, x.name);
144   utilFirst.name[x.len+1] = 0;
145   utilFirst.len = x.len + 1;
146 
147   utilSecond.name[0] = second;
148   u_strcpy(utilSecond.name+1, y.name);
149   utilSecond.name[y.len+1] = 0;
150   utilSecond.len = y.len + 1;
151 
152   if(comparer(&utilFirstP, &utilSecondP) < 0) {
153     return TRUE;
154   } else {
155     return FALSE;
156   }
157 }
158 
159 UBool
probeSuffixNoSep(const Line & x,const Line & y,UChar first,UChar second)160 StrengthProbe::probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
161   u_strcpy(utilFirst.name, x.name);
162   utilFirst.name[x.len] = first;
163   utilFirst.name[x.len+1] = 0;
164   utilFirst.len = x.len + 1;
165   u_strcpy(utilSecond.name, y.name);
166   utilSecond.name[y.len] = second;
167   utilSecond.name[y.len+1] = 0;
168   utilSecond.len = y.len + 1;
169 
170   if(comparer(&utilFirstP, &utilSecondP) < 0) {
171     return TRUE;
172   } else {
173     return FALSE;
174   }
175 }
176 
177 UColAttributeValue
getStrength(const Line & x,const Line & y)178 StrengthProbe::getStrength(const Line &x, const Line &y) {
179   const Line *xp = &x;
180   const Line *yp = &y;
181 
182   Line empty;
183   Line *emptyP = &empty;
184   if(comparer(&emptyP, &xp) == 0) {
185     return distanceFromEmptyString(y);
186   }
187 
188   int32_t result = comparer(&xp, &yp);
189 
190   if(result == 0) {
191     return UCOL_IDENTICAL;
192   } else if(result > 0) {
193     return UCOL_OFF; // bad situation
194   } else { // we need to probe strength
195     if(probeSuffix(x, y, B1, B0)) {
196     //if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
197       return UCOL_PRIMARY;
198     } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
199       return UCOL_SECONDARY;
200     } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
201       return UCOL_TERTIARY;
202     } else if(!probePrefix(x, y, B3, B0)) {
203       return UCOL_QUATERNARY;
204     }
205     /*
206     //if(probeSuffix(x, y, B1, B0)) {
207     if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
208       return UCOL_PRIMARY;
209     } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
210       return UCOL_SECONDARY;
211     } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
212       return UCOL_TERTIARY;
213     } else if(!probePrefix(x, y, B3, B0)) {
214       return UCOL_QUATERNARY;
215     }
216     */
217   }
218   return UCOL_OFF; // bad
219 }
220 
221 UColAttributeValue
getStrength(const UnicodeString & sx,const UnicodeString & sy)222 StrengthProbe::getStrength(const UnicodeString &sx, const UnicodeString &sy) {
223   Line x(sx);
224   Line y(sy);
225   return getStrength(x, y);
226 }
227 
228 int32_t
compare(const UnicodeString & sx,const UnicodeString & sy)229 StrengthProbe::compare(const UnicodeString &sx, const UnicodeString &sy) {
230   Line x(sx);
231   Line y(sy);
232   const Line *xp = &x;
233   const Line *yp = &y;
234   return comparer(&xp, &yp);
235 }
236 
237 int32_t
compare(const Line & x,const Line & y)238 StrengthProbe::compare(const Line &x, const Line &y) {
239   const Line *xp = &x;
240   const Line *yp = &y;
241   return comparer(&xp, &yp);
242 }
243 
244 UColAttributeValue
distanceFromEmptyString(const Line & x)245 StrengthProbe::distanceFromEmptyString(const Line &x) {
246   if(x.name[0] == 0x30D) {
247     int32_t putBreakPointHere = 0;
248   }
249   Line empty;
250   Line *emptyP = &empty;
251   uint8_t buff[256];
252   getSortKey(empty.name, empty.len, buff, 256);
253   Line B0Line(B0);
254   Line *B0LineP = &B0Line;
255   const Line *xp = &x;
256   int32_t result = comparer(&emptyP, &xp);
257   if(result == 0) {
258     return UCOL_IDENTICAL;
259   } else if(result > 0) {
260     return UCOL_OFF;
261   }
262   result = comparer(&B0LineP, &xp);
263   if(result <= 0) {
264     return UCOL_PRIMARY;
265   }
266   Line sexb0(SE);
267   sexb0.append(x.name, x.len);
268   sexb0.append(B0);
269 
270   Line seb0(SE);
271   seb0.append(B0);
272   uint8_t seb0K[256];
273   uint8_t sexb0K[256];
274   uint8_t seb2K[256];
275   uint8_t seb3K[256];
276   memset(seb0K, 0, 256);
277   memset(sexb0K, 0, 256);
278   memset(seb2K, 0, 256);
279   memset(seb3K, 0, 256);
280 
281   getSortKey(seb0, seb0K, 256);
282   getSortKey(sexb0, sexb0K, 256);
283 
284   if(compare(seb0, sexb0) <= 0) {
285     Line seb2(SE);
286     seb2.append(B2);
287     getSortKey(seb2, seb2K, 256);
288     result = compare(seb2, sexb0);
289     if((result <= 0 && !frenchSecondary) || (result >= 0 && frenchSecondary)) { // swamps tertiary difference
290       return UCOL_SECONDARY;
291     }
292     Line seb3(SE);
293     seb3.append(B3);
294     getSortKey(seb3, seb3K, 256);
295     if(compare(seb3, sexb0) < 0) {
296       return UCOL_TERTIARY;
297     }
298     return UCOL_QUATERNARY;
299   } else {
300     // if this was UCA, we would have a primary difference.
301     // however, this might not be so, since not everybody
302     // makes well formed CEs.
303     // in cs_CZ on linux, space is tertiary ignorable, but
304     // its quaternary level strength is lower than quad
305     // strengths for non-ignorables. oh well, more testing
306     // required
307     // I think that we can only have quaternary difference
308     // here (in addition to primary difference).
309     //if(!probePrefix(x, empty, B3, B0)) {
310       //return UCOL_QUATERNARY;
311     //} else {
312       return UCOL_PRIMARY;
313     //}
314   }
315 }
316 
317 UColAttributeValue
distanceFromEmptyString(const UnicodeString & x)318 StrengthProbe::distanceFromEmptyString(const UnicodeString &x) {
319   const Line xp(x);
320   return distanceFromEmptyString(xp);
321 }
322 
323 
324 UColAttributeValue
getPrefixedStrength(const Line & prefix,const Line & x,const Line & y)325 StrengthProbe::getPrefixedStrength(const Line &prefix, const Line &x, const Line &y) {
326   contractionUtilFirst.setToConcat(&prefix, &x);
327   contractionUtilSecond.setToConcat(&prefix, &y);
328   return getStrength(contractionUtilFirst, contractionUtilSecond);
329 }
330 
331 
StrengthProbe(const StrengthProbe & that)332 StrengthProbe::StrengthProbe(const StrengthProbe &that) {
333   *this = that;
334 }
335 
336 StrengthProbe &
operator =(const StrengthProbe & that)337 StrengthProbe::operator=(const StrengthProbe &that) {
338   if(this != &that) {
339     B0 = that.B0;
340     B1 = that.B1;
341     B2 = that.B2;
342     B3 = that.B3;
343     SE = that.SE;
344     frenchSecondary = that.frenchSecondary;
345     comparer = that.comparer;
346     skgetter = that.skgetter;
347 
348     utilFirstP = &utilFirst;
349     utilSecondP = &utilSecond;
350   }
351 
352   return *this;
353 }
354 
355 UBool
isFrenchSecondary(UErrorCode & status)356 StrengthProbe::isFrenchSecondary(UErrorCode &status) {
357   utilFirst.setTo(B0);
358   utilFirst.append(SE);
359   utilFirst.append(B2);
360   utilSecond.setTo(B2);
361   utilSecond.append(SE);
362   utilSecond.append(B0);
363 
364   int32_t result = compare(utilFirst, utilSecond);
365 
366   if(result < 0) {
367     return FALSE;
368   } else if(result > 0) {
369     frenchSecondary = TRUE;
370     return TRUE;
371   } else {
372     status = U_INTERNAL_PROGRAM_ERROR;
373     return FALSE;
374   }
375 }
376 
377 UBool
isUpperFirst(UErrorCode & status)378 StrengthProbe::isUpperFirst(UErrorCode &status) {
379   UChar i = 0;
380   int32_t result = 0;
381   int32_t upper = 0, lower = 0, equal = 0;
382   for(i = 0x41; i < 0x5B; i++) {
383     utilFirst.setTo(i);
384     utilSecond.setTo(i+0x20);
385     result = compare(utilFirst, utilSecond);
386     if(result < 0) {
387       upper++;
388     } else if(result > 0) {
389       lower++;
390     } else {
391       equal++;
392     }
393   }
394 
395   if(lower == 0 && equal == 0) {
396     return TRUE;
397   }
398   if(upper == 0 && equal == 0) {
399     return FALSE;
400   }
401   status = U_INTERNAL_PROGRAM_ERROR;
402   return FALSE;
403 }
404 
405