1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: cstrcase.c
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002feb21
16 * created by: Markus W. Scherer
17 *
18 * Test file for string casing C API functions.
19 */
20
21 #include <string.h>
22 #include "unicode/utypes.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uloc.h"
26 #include "unicode/ubrk.h"
27 #include "unicode/ucasemap.h"
28 #include "cmemory.h"
29 #include "cintltst.h"
30 #include "ustr_imp.h"
31
32 /* test string case mapping functions --------------------------------------- */
33
34 static void
TestCaseLower(void)35 TestCaseLower(void) {
36 static const UChar
37
38 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
39 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
40 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff };
41
42 UChar buffer[32];
43 int32_t length;
44 UErrorCode errorCode;
45
46 /* lowercase with root locale and separate buffers */
47 buffer[0]=0xabcd;
48 errorCode=U_ZERO_ERROR;
49 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer),
50 beforeLower, UPRV_LENGTHOF(beforeLower),
51 "",
52 &errorCode);
53 if( U_FAILURE(errorCode) ||
54 length!=(UPRV_LENGTHOF(lowerRoot)) ||
55 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
56 buffer[length]!=0
57 ) {
58 log_err("error in u_strToLower(root locale)=%ld error=%s string matches: %s\t\nlowerRoot=%s\t\nbuffer=%s\n",
59 length,
60 u_errorName(errorCode),
61 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)==0 &&
62 buffer[length]==0 ? "yes" : "no",
63 aescstrdup(lowerRoot,-1),
64 aescstrdup(buffer,-1));
65 }
66
67 /* lowercase with turkish locale and in the same buffer */
68 uprv_memcpy(buffer, beforeLower, sizeof(beforeLower));
69 buffer[UPRV_LENGTHOF(beforeLower)]=0;
70 errorCode=U_ZERO_ERROR;
71 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer),
72 buffer, -1, /* implicit srcLength */
73 "tr",
74 &errorCode);
75 if( U_FAILURE(errorCode) ||
76 length!=(UPRV_LENGTHOF(lowerTurkish)) ||
77 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
78 buffer[length]!=0
79 ) {
80 log_err("error in u_strToLower(turkish locale)=%ld error=%s string matches: %s\n",
81 length,
82 u_errorName(errorCode),
83 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
84 }
85
86 /* test preflighting */
87 buffer[0]=buffer[2]=0xabcd;
88 errorCode=U_ZERO_ERROR;
89 length=u_strToLower(buffer, 2, /* set destCapacity=2 */
90 beforeLower, UPRV_LENGTHOF(beforeLower),
91 "",
92 &errorCode);
93 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
94 length!=(UPRV_LENGTHOF(lowerRoot)) ||
95 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
96 buffer[2]!=0xabcd
97 ) {
98 log_err("error in u_strToLower(root locale preflighting)=%ld error=%s string matches: %s\n",
99 length,
100 u_errorName(errorCode),
101 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
102 }
103
104 /* test error handling */
105 errorCode=U_ZERO_ERROR;
106 length=u_strToLower(NULL, UPRV_LENGTHOF(buffer),
107 beforeLower, UPRV_LENGTHOF(beforeLower),
108 "",
109 &errorCode);
110 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
111 log_err("error in u_strToLower(root locale dest=NULL)=%ld error=%s\n",
112 length,
113 u_errorName(errorCode));
114 }
115
116 buffer[0]=0xabcd;
117 errorCode=U_ZERO_ERROR;
118 length=u_strToLower(buffer, -1,
119 beforeLower, UPRV_LENGTHOF(beforeLower),
120 "",
121 &errorCode);
122 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
123 buffer[0]!=0xabcd
124 ) {
125 log_err("error in u_strToLower(root locale destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
126 length,
127 u_errorName(errorCode),
128 buffer[0]);
129 }
130 }
131
132 static void
TestCaseUpper(void)133 TestCaseUpper(void) {
134 static const UChar
135
136 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
137 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
138 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff };
139
140 UChar buffer[32];
141 int32_t length;
142 UErrorCode errorCode;
143
144 /* uppercase with root locale and in the same buffer */
145 uprv_memcpy(buffer, beforeUpper, sizeof(beforeUpper));
146 errorCode=U_ZERO_ERROR;
147 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
148 buffer, UPRV_LENGTHOF(beforeUpper),
149 "",
150 &errorCode);
151 if( U_FAILURE(errorCode) ||
152 length!=(UPRV_LENGTHOF(upperRoot)) ||
153 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
154 buffer[length]!=0
155 ) {
156 log_err("error in u_strToUpper(root locale)=%ld error=%s string matches: %s\n",
157 length,
158 u_errorName(errorCode),
159 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
160 }
161
162 /* uppercase with turkish locale and separate buffers */
163 buffer[0]=0xabcd;
164 errorCode=U_ZERO_ERROR;
165 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
166 beforeUpper, UPRV_LENGTHOF(beforeUpper),
167 "tr",
168 &errorCode);
169 if( U_FAILURE(errorCode) ||
170 length!=(UPRV_LENGTHOF(upperTurkish)) ||
171 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
172 buffer[length]!=0
173 ) {
174 log_err("error in u_strToUpper(turkish locale)=%ld error=%s string matches: %s\n",
175 length,
176 u_errorName(errorCode),
177 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
178 }
179
180 /* test preflighting */
181 errorCode=U_ZERO_ERROR;
182 length=u_strToUpper(NULL, 0,
183 beforeUpper, UPRV_LENGTHOF(beforeUpper),
184 "tr",
185 &errorCode);
186 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
187 length!=(UPRV_LENGTHOF(upperTurkish))
188 ) {
189 log_err("error in u_strToUpper(turkish locale pure preflighting)=%ld error=%s\n",
190 length,
191 u_errorName(errorCode));
192 }
193
194 /* test error handling */
195 buffer[0]=0xabcd;
196 errorCode=U_ZERO_ERROR;
197 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
198 NULL, UPRV_LENGTHOF(beforeUpper),
199 "tr",
200 &errorCode);
201 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
202 buffer[0]!=0xabcd
203 ) {
204 log_err("error in u_strToUpper(turkish locale src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
205 length,
206 u_errorName(errorCode),
207 buffer[0]);
208 }
209
210 buffer[0]=0xabcd;
211 errorCode=U_ZERO_ERROR;
212 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
213 beforeUpper, -2,
214 "tr",
215 &errorCode);
216 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
217 buffer[0]!=0xabcd
218 ) {
219 log_err("error in u_strToUpper(turkish locale srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
220 length,
221 u_errorName(errorCode),
222 buffer[0]);
223 }
224 }
225
226 #if !UCONFIG_NO_BREAK_ITERATION
227
228 static void
TestCaseTitle(void)229 TestCaseTitle(void) {
230 static const UChar
231
232 beforeTitle[]= { 0x61, 0x42, 0x20, 0x69, 0x3c2, 0x20, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
233 titleWord[]= { 0x41, 0x62, 0x20, 0x49, 0x3c2, 0x20, 0x53, 0x73, 0x3c3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff },
234 titleChar[]= { 0x41, 0x42, 0x20, 0x49, 0x3a3, 0x20, 0x53, 0x73, 0x3a3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff };
235
236 UChar buffer[32];
237 UBreakIterator *titleIterChars;
238 int32_t length;
239 UErrorCode errorCode;
240
241 errorCode=U_ZERO_ERROR;
242 titleIterChars=ubrk_open(UBRK_CHARACTER, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
243 if(U_FAILURE(errorCode)) {
244 log_err_status(errorCode, "error: ubrk_open(UBRK_CHARACTER)->%s\n", u_errorName(errorCode));
245 return;
246 }
247
248 /* titlecase with standard break iterator and in the same buffer */
249 uprv_memcpy(buffer, beforeTitle, sizeof(beforeTitle));
250 errorCode=U_ZERO_ERROR;
251 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
252 buffer, UPRV_LENGTHOF(beforeTitle),
253 NULL, "",
254 &errorCode);
255 if( U_FAILURE(errorCode) ||
256 length!=(UPRV_LENGTHOF(titleWord)) ||
257 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)!=0 ||
258 buffer[length]!=0
259 ) {
260 log_err("error in u_strToTitle(standard iterator)=%ld error=%s string matches: %s\n",
261 length,
262 u_errorName(errorCode),
263 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
264 }
265
266 /* titlecase with UBRK_CHARACTERS and separate buffers */
267 buffer[0]=0xabcd;
268 errorCode=U_ZERO_ERROR;
269 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
270 beforeTitle, UPRV_LENGTHOF(beforeTitle),
271 titleIterChars, "",
272 &errorCode);
273 if( U_FAILURE(errorCode) ||
274 length!=(UPRV_LENGTHOF(titleChar)) ||
275 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)!=0 ||
276 buffer[length]!=0
277 ) {
278 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s string matches: %s\n",
279 length,
280 u_errorName(errorCode),
281 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
282 }
283
284 /* test preflighting */
285 errorCode=U_ZERO_ERROR;
286 length=u_strToTitle(NULL, 0,
287 beforeTitle, UPRV_LENGTHOF(beforeTitle),
288 titleIterChars, "",
289 &errorCode);
290 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
291 length!=(UPRV_LENGTHOF(titleChar))
292 ) {
293 log_err("error in u_strToTitle(UBRK_CHARACTERS pure preflighting)=%ld error=%s\n",
294 length,
295 u_errorName(errorCode));
296 }
297
298 /* test error handling */
299 buffer[0]=0xabcd;
300 errorCode=U_ZERO_ERROR;
301 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
302 NULL, UPRV_LENGTHOF(beforeTitle),
303 titleIterChars, "",
304 &errorCode);
305 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
306 buffer[0]!=0xabcd
307 ) {
308 log_err("error in u_strToTitle(UBRK_CHARACTERS src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
309 length,
310 u_errorName(errorCode),
311 buffer[0]);
312 }
313
314 buffer[0]=0xabcd;
315 errorCode=U_ZERO_ERROR;
316 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
317 beforeTitle, -2,
318 titleIterChars, "",
319 &errorCode);
320 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
321 buffer[0]!=0xabcd
322 ) {
323 log_err("error in u_strToTitle(UBRK_CHARACTERS srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
324 length,
325 u_errorName(errorCode),
326 buffer[0]);
327 }
328
329 ubrk_close(titleIterChars);
330 }
331
332 static void
TestCaseDutchTitle(void)333 TestCaseDutchTitle(void) {
334 static const UChar
335
336 beforeTitle[]= { 0x69, 0x6A, 0x73, 0x73, 0x45, 0x6c, 0x20, 0x69, 0x67, 0x6c, 0x4f, 0x6f , 0x20 , 0x49, 0x4A, 0x53, 0x53, 0x45, 0x4C },
337 titleRoot[]= { 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6C },
338 titleDutch[]= { 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6C };
339
340 UChar buffer[32];
341 UBreakIterator *titleIterWord;
342 int32_t length;
343 UErrorCode errorCode;
344
345 errorCode=U_ZERO_ERROR;
346 titleIterWord=ubrk_open(UBRK_WORD, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
347 if(U_FAILURE(errorCode)) {
348 log_err_status(errorCode, "error: ubrk_open(UBRK_WORD)->%s\n", u_errorName(errorCode));
349 return;
350 }
351
352 /* titlecase with default locale */
353 buffer[0]=0xabcd;
354 errorCode=U_ZERO_ERROR;
355 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
356 beforeTitle, UPRV_LENGTHOF(beforeTitle),
357 titleIterWord, "",
358 &errorCode);
359 if( U_FAILURE(errorCode) ||
360 length!=(UPRV_LENGTHOF(titleRoot)) ||
361 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
362 buffer[length]!=0
363 ) {
364 char charsOut[21];
365 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
366 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s root locale string matches: %s\noutput buffer is {%s}\n",
367 length,
368 u_errorName(errorCode),
369 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
370 }
371 /* titlecase with Dutch locale */
372 buffer[0]=0xabcd;
373 errorCode=U_ZERO_ERROR;
374 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
375 beforeTitle, UPRV_LENGTHOF(beforeTitle),
376 titleIterWord, "nl",
377 &errorCode);
378 if( U_FAILURE(errorCode) ||
379 length!=(UPRV_LENGTHOF(titleDutch)) ||
380 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)!=0 ||
381 buffer[length]!=0
382 ) {
383 char charsOut[21];
384 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
385 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s dutch locale string matches: %s\noutput buffer is {%s}\n",
386 length,
387 u_errorName(errorCode),
388 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
389 }
390
391 ubrk_close(titleIterWord);
392 }
393
394 #endif
395
396 /* test case folding and case-insensitive string compare -------------------- */
397
398 static void
TestCaseFolding(void)399 TestCaseFolding(void) {
400 /*
401 * CaseFolding.txt says about i and its cousins:
402 * 0049; C; 0069; # LATIN CAPITAL LETTER I
403 * 0049; T; 0131; # LATIN CAPITAL LETTER I
404 *
405 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
406 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
407 * That's all.
408 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
409 */
410 static const UChar32
411 simple[]={
412 /* input, default, exclude special i */
413 0x61, 0x61, 0x61,
414 0x49, 0x69, 0x131,
415 0x130, 0x130, 0x69,
416 0x131, 0x131, 0x131,
417 0xdf, 0xdf, 0xdf,
418 0xfb03, 0xfb03, 0xfb03,
419 0x1040e,0x10436,0x10436,
420 0x5ffff,0x5ffff,0x5ffff
421 };
422
423 static const UChar
424 mixed[]= { 0x61, 0x42, 0x130, 0x49, 0x131, 0x3d0, 0xdf, 0xfb03, 0xd93f, 0xdfff },
425 foldedDefault[]= { 0x61, 0x62, 0x69, 0x307, 0x69, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff },
426 foldedExcludeSpecialI[]={ 0x61, 0x62, 0x69, 0x131, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff };
427
428 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
429
430 const UChar32 *p;
431 int32_t i;
432
433 UChar buffer[32];
434 int32_t length;
435 UErrorCode errorCode;
436 UBool isUnicode_3_1;
437
438 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
439 u_getUnicodeVersion(unicodeVersion);
440 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
441
442 /* test simple case folding */
443 p=simple;
444 for(i=0; i<sizeof(simple)/12; p+=3, ++i) {
445 if(u_foldCase(p[0], U_FOLD_CASE_DEFAULT)!=p[1]) {
446 log_err("error: u_foldCase(0x%04lx, default)=0x%04lx instead of 0x%04lx\n",
447 p[0], u_foldCase(p[0], U_FOLD_CASE_DEFAULT), p[1]);
448 return;
449 }
450
451 if(isUnicode_3_1 && u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I)!=p[2]) {
452 log_err("error: u_foldCase(0x%04lx, exclude special i)=0x%04lx instead of 0x%04lx\n",
453 p[0], u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I), p[2]);
454 return;
455 }
456 }
457
458 /* test full string case folding with default option and separate buffers */
459 buffer[0]=0xabcd;
460 errorCode=U_ZERO_ERROR;
461 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
462 mixed, UPRV_LENGTHOF(mixed),
463 U_FOLD_CASE_DEFAULT,
464 &errorCode);
465 if( U_FAILURE(errorCode) ||
466 length!=(UPRV_LENGTHOF(foldedDefault)) ||
467 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
468 buffer[length]!=0
469 ) {
470 log_err("error in u_strFoldCase(default)=%ld error=%s string matches: %s\n",
471 length,
472 u_errorName(errorCode),
473 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
474 }
475
476 /* exclude special i */
477 if(isUnicode_3_1) {
478 buffer[0]=0xabcd;
479 errorCode=U_ZERO_ERROR;
480 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
481 mixed, UPRV_LENGTHOF(mixed),
482 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
483 &errorCode);
484 if( U_FAILURE(errorCode) ||
485 length!=(UPRV_LENGTHOF(foldedExcludeSpecialI)) ||
486 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
487 buffer[length]!=0
488 ) {
489 log_err("error in u_strFoldCase(exclude special i)=%ld error=%s string matches: %s\n",
490 length,
491 u_errorName(errorCode),
492 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
493 }
494 }
495
496 /* test full string case folding with default option and in the same buffer */
497 uprv_memcpy(buffer, mixed, sizeof(mixed));
498 buffer[UPRV_LENGTHOF(mixed)]=0;
499 errorCode=U_ZERO_ERROR;
500 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
501 buffer, -1, /* implicit srcLength */
502 U_FOLD_CASE_DEFAULT,
503 &errorCode);
504 if( U_FAILURE(errorCode) ||
505 length!=(UPRV_LENGTHOF(foldedDefault)) ||
506 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
507 buffer[length]!=0
508 ) {
509 log_err("error in u_strFoldCase(default same buffer)=%ld error=%s string matches: %s\n",
510 length,
511 u_errorName(errorCode),
512 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
513 }
514
515 /* test full string case folding, exclude special i, in the same buffer */
516 if(isUnicode_3_1) {
517 uprv_memcpy(buffer, mixed, sizeof(mixed));
518 errorCode=U_ZERO_ERROR;
519 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
520 buffer, UPRV_LENGTHOF(mixed),
521 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
522 &errorCode);
523 if( U_FAILURE(errorCode) ||
524 length!=UPRV_LENGTHOF(foldedExcludeSpecialI) ||
525 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
526 buffer[length]!=0
527 ) {
528 log_err("error in u_strFoldCase(exclude special i same buffer)=%ld error=%s string matches: %s\n",
529 length,
530 u_errorName(errorCode),
531 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
532 }
533 }
534
535 /* test preflighting */
536 buffer[0]=buffer[2]=0xabcd;
537 errorCode=U_ZERO_ERROR;
538 length=u_strFoldCase(buffer, 2, /* set destCapacity=2 */
539 mixed, UPRV_LENGTHOF(mixed),
540 U_FOLD_CASE_DEFAULT,
541 &errorCode);
542 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
543 length!=UPRV_LENGTHOF(foldedDefault) ||
544 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
545 buffer[2]!=0xabcd
546 ) {
547 log_err("error in u_strFoldCase(default preflighting)=%ld error=%s string matches: %s\n",
548 length,
549 u_errorName(errorCode),
550 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
551 }
552
553 errorCode=U_ZERO_ERROR;
554 length=u_strFoldCase(NULL, 0,
555 mixed, UPRV_LENGTHOF(mixed),
556 U_FOLD_CASE_DEFAULT,
557 &errorCode);
558 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
559 length!=UPRV_LENGTHOF(foldedDefault)
560 ) {
561 log_err("error in u_strFoldCase(default pure preflighting)=%ld error=%s\n",
562 length,
563 u_errorName(errorCode));
564 }
565
566 /* test error handling */
567 errorCode=U_ZERO_ERROR;
568 length=u_strFoldCase(NULL, UPRV_LENGTHOF(buffer),
569 mixed, UPRV_LENGTHOF(mixed),
570 U_FOLD_CASE_DEFAULT,
571 &errorCode);
572 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
573 log_err("error in u_strFoldCase(default dest=NULL)=%ld error=%s\n",
574 length,
575 u_errorName(errorCode));
576 }
577
578 buffer[0]=0xabcd;
579 errorCode=U_ZERO_ERROR;
580 length=u_strFoldCase(buffer, -1,
581 mixed, UPRV_LENGTHOF(mixed),
582 U_FOLD_CASE_DEFAULT,
583 &errorCode);
584 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
585 buffer[0]!=0xabcd
586 ) {
587 log_err("error in u_strFoldCase(default destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
588 length,
589 u_errorName(errorCode),
590 buffer[0]);
591 }
592
593 buffer[0]=0xabcd;
594 errorCode=U_ZERO_ERROR;
595 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
596 NULL, UPRV_LENGTHOF(mixed),
597 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
598 &errorCode);
599 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
600 buffer[0]!=0xabcd
601 ) {
602 log_err("error in u_strFoldCase(exclude special i src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
603 length,
604 u_errorName(errorCode),
605 buffer[0]);
606 }
607
608 buffer[0]=0xabcd;
609 errorCode=U_ZERO_ERROR;
610 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
611 mixed, -2,
612 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
613 &errorCode);
614 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
615 buffer[0]!=0xabcd
616 ) {
617 log_err("error in u_strFoldCase(exclude special i srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
618 length,
619 u_errorName(errorCode),
620 buffer[0]);
621 }
622 }
623
624 static void
TestCaseCompare(void)625 TestCaseCompare(void) {
626 static const UChar
627
628 mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0xfb03, 0xd93f, 0xdfff, 0 },
629 otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
630 otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
631 different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
632
633 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
634
635 int32_t result, lenMixed, lenOtherDefault, lenOtherExcludeSpecialI, lenDifferent;
636 UErrorCode errorCode;
637 UBool isUnicode_3_1;
638
639 errorCode=U_ZERO_ERROR;
640
641 lenMixed=u_strlen(mixed);
642 lenOtherDefault=u_strlen(otherDefault);
643 (void)lenOtherDefault; /* Suppress set but not used warning. */
644 lenOtherExcludeSpecialI=u_strlen(otherExcludeSpecialI);
645 lenDifferent=u_strlen(different);
646
647 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
648 u_getUnicodeVersion(unicodeVersion);
649 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
650 (void)isUnicode_3_1; /* Suppress set but not used warning. */
651
652 /* test u_strcasecmp() */
653 result=u_strcasecmp(mixed, otherDefault, U_FOLD_CASE_DEFAULT);
654 if(result!=0) {
655 log_err("error: u_strcasecmp(mixed, other, default)=%ld instead of 0\n", result);
656 }
657 result=u_strCaseCompare(mixed, -1, otherDefault, -1, U_FOLD_CASE_DEFAULT, &errorCode);
658 if(result!=0) {
659 log_err("error: u_strCaseCompare(mixed, other, default)=%ld instead of 0\n", result);
660 }
661
662 /* test u_strcasecmp() - exclude special i */
663 result=u_strcasecmp(mixed, otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
664 if(result!=0) {
665 log_err("error: u_strcasecmp(mixed, other, exclude special i)=%ld instead of 0\n", result);
666 }
667 result=u_strCaseCompare(mixed, lenMixed, otherExcludeSpecialI, lenOtherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
668 if(result!=0) {
669 log_err("error: u_strCaseCompare(mixed, other, exclude special i)=%ld instead of 0\n", result);
670 }
671
672 /* test u_strcasecmp() */
673 result=u_strcasecmp(mixed, different, U_FOLD_CASE_DEFAULT);
674 if(result<=0) {
675 log_err("error: u_strcasecmp(mixed, different, default)=%ld instead of positive\n", result);
676 }
677 result=u_strCaseCompare(mixed, -1, different, lenDifferent, U_FOLD_CASE_DEFAULT, &errorCode);
678 if(result<=0) {
679 log_err("error: u_strCaseCompare(mixed, different, default)=%ld instead of positive\n", result);
680 }
681
682 /* test u_strncasecmp() - stop before the sharp s (U+00df) */
683 result=u_strncasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
684 if(result!=0) {
685 log_err("error: u_strncasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
686 }
687 result=u_strCaseCompare(mixed, 4, different, 4, U_FOLD_CASE_DEFAULT, &errorCode);
688 if(result!=0) {
689 log_err("error: u_strCaseCompare(mixed, 4, different, 4, default)=%ld instead of 0\n", result);
690 }
691
692 /* test u_strncasecmp() - stop in the middle of the sharp s (U+00df) */
693 result=u_strncasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
694 if(result<=0) {
695 log_err("error: u_strncasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
696 }
697 result=u_strCaseCompare(mixed, 5, different, 5, U_FOLD_CASE_DEFAULT, &errorCode);
698 if(result<=0) {
699 log_err("error: u_strCaseCompare(mixed, 5, different, 5, default)=%ld instead of positive\n", result);
700 }
701
702 /* test u_memcasecmp() - stop before the sharp s (U+00df) */
703 result=u_memcasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
704 if(result!=0) {
705 log_err("error: u_memcasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
706 }
707
708 /* test u_memcasecmp() - stop in the middle of the sharp s (U+00df) */
709 result=u_memcasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
710 if(result<=0) {
711 log_err("error: u_memcasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
712 }
713 }
714
715 /* test UCaseMap ------------------------------------------------------------ */
716
717 /*
718 * API test for UCaseMap;
719 * test cases for actual case mappings using UCaseMap see
720 * intltest utility/UnicodeStringTest/StringCaseTest/TestCasing
721 */
722 static void
TestUCaseMap(void)723 TestUCaseMap(void) {
724 static const char
725 aBc[] ={ 0x61, 0x42, 0x63, 0 },
726 abc[] ={ 0x61, 0x62, 0x63, 0 },
727 ABCg[]={ 0x41, 0x42, 0x43, 0x67, 0 },
728 defg[]={ 0x64, 0x65, 0x66, 0x67, 0 };
729 char utf8Out[8];
730
731 UCaseMap *csm;
732 const char *locale;
733 uint32_t options;
734 int32_t length;
735 UErrorCode errorCode;
736
737 errorCode=U_ZERO_ERROR;
738 csm=ucasemap_open("tur", 0xa5, &errorCode);
739 if(U_FAILURE(errorCode)) {
740 log_err("ucasemap_open(\"tur\") failed - %s\n", u_errorName(errorCode));
741 return;
742 }
743 locale=ucasemap_getLocale(csm);
744 if(0!=strcmp(locale, "tr")) {
745 log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale);
746 }
747 /* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */
748 ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
749 locale=ucasemap_getLocale(csm);
750 if(0!=strcmp(locale, "i-klingon")) {
751 log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale);
752 }
753
754 errorCode=U_ZERO_ERROR;
755 options=ucasemap_getOptions(csm);
756 if(options!=0xa5) {
757 log_err("ucasemap_getOptions(ucasemap_open(0xa5))==0x%lx!=0xa5\n", (long)options);
758 }
759 ucasemap_setOptions(csm, 0x333333, &errorCode);
760 options=ucasemap_getOptions(csm);
761 if(options!=0x333333) {
762 log_err("ucasemap_getOptions(ucasemap_setOptions(0x333333))==0x%lx!=0x333333\n", (long)options);
763 }
764
765 /* test case mapping API; not all permutations necessary due to shared implementation code */
766
767 /* NUL terminated source */
768 errorCode=U_ZERO_ERROR;
769 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
770 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
771 log_err("ucasemap_utf8ToLower(aBc\\0) failed\n");
772 }
773
774 /* incoming failure code */
775 errorCode=U_PARSE_ERROR;
776 strcpy(utf8Out, defg);
777 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
778 if(errorCode!=U_PARSE_ERROR || 0!=strcmp(defg, utf8Out)) {
779 log_err("ucasemap_utf8ToLower(failure) failed\n");
780 }
781
782 /* overlapping input & output */
783 errorCode=U_ZERO_ERROR;
784 strcpy(utf8Out, aBc);
785 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, utf8Out+1, 2, &errorCode);
786 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
787 log_err("ucasemap_utf8ToUpper(overlap 1) failed\n");
788 }
789
790 /* overlap in the other direction */
791 errorCode=U_ZERO_ERROR;
792 strcpy(utf8Out, aBc);
793 length=ucasemap_utf8ToUpper(csm, utf8Out+1, 2, utf8Out, 2, &errorCode);
794 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
795 log_err("ucasemap_utf8ToUpper(overlap 2) failed\n");
796 }
797
798 /* NULL destination */
799 errorCode=U_ZERO_ERROR;
800 strcpy(utf8Out, defg);
801 length=ucasemap_utf8ToLower(csm, NULL, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
802 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
803 log_err("ucasemap_utf8ToLower(dest=NULL) failed\n");
804 }
805
806 /* destCapacity<0 */
807 errorCode=U_ZERO_ERROR;
808 strcpy(utf8Out, defg);
809 length=ucasemap_utf8ToLower(csm, utf8Out, -2, aBc, -1, &errorCode);
810 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
811 log_err("ucasemap_utf8ToLower(destCapacity<0) failed\n");
812 }
813
814 /* NULL source */
815 errorCode=U_ZERO_ERROR;
816 strcpy(utf8Out, defg);
817 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), NULL, -1, &errorCode);
818 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
819 log_err("ucasemap_utf8ToLower(src=NULL) failed\n");
820 }
821
822 /* srcLength<-1 */
823 errorCode=U_ZERO_ERROR;
824 strcpy(utf8Out, defg);
825 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -2, &errorCode);
826 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
827 log_err("ucasemap_utf8ToLower(srcLength<-1) failed\n");
828 }
829
830 /* buffer overflow */
831 errorCode=U_ZERO_ERROR;
832 strcpy(utf8Out, defg);
833 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, aBc, 3, &errorCode);
834 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3 || 0!=strcmp(defg+2, utf8Out+2)) {
835 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
836 }
837
838 /* dest not terminated (leaves g from defg alone) */
839 errorCode=U_ZERO_ERROR;
840 strcpy(utf8Out, defg);
841 length=ucasemap_utf8ToUpper(csm, utf8Out, 3, aBc, 3, &errorCode);
842 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=3 || 0!=strcmp(ABCg, utf8Out)) {
843 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
844 }
845
846 /* C API coverage for case folding. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
847 errorCode=U_ZERO_ERROR;
848 utf8Out[0]=0;
849 length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3, &errorCode);
850 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
851 log_err("ucasemap_utf8FoldCase(aBc) failed\n");
852 }
853
854 ucasemap_close(csm);
855 }
856
857 #if !UCONFIG_NO_BREAK_ITERATION
858
859 /* Try titlecasing with options. */
860 static void
TestUCaseMapToTitle(void)861 TestUCaseMapToTitle(void) {
862 /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */
863 /*
864 * Note: The sentence BreakIterator does not recognize a '.'
865 * as a sentence terminator if it is followed by lowercase.
866 * That is why the example has the '!'.
867 */
868 static const UChar
869
870 beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e },
871 titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e },
872 titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e },
873 titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e };
874
875 UChar buffer[32];
876 UCaseMap *csm;
877 UBreakIterator *sentenceIter;
878 const UBreakIterator *iter;
879 int32_t length;
880 UErrorCode errorCode;
881
882 errorCode=U_ZERO_ERROR;
883 csm=ucasemap_open("", 0, &errorCode);
884 if(U_FAILURE(errorCode)) {
885 log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode));
886 return;
887 }
888
889 iter=ucasemap_getBreakIterator(csm);
890 if(iter!=NULL) {
891 log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter);
892 }
893
894 /* Use default UBreakIterator: Word breaks. */
895 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
896 if( U_FAILURE(errorCode) ||
897 length!=UPRV_LENGTHOF(titleWord) ||
898 0!=u_memcmp(buffer, titleWord, length) ||
899 buffer[length]!=0
900 ) {
901 log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
902 }
903 if (U_SUCCESS(errorCode)) {
904 iter=ucasemap_getBreakIterator(csm);
905 if(iter==NULL) {
906 log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n");
907 }
908 }
909
910 /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */
911 ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode);
912 if(U_FAILURE(errorCode)) {
913 log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode));
914 return;
915 }
916
917 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
918 if( U_FAILURE(errorCode) ||
919 length!=UPRV_LENGTHOF(titleWordNoAdjust) ||
920 0!=u_memcmp(buffer, titleWordNoAdjust, length) ||
921 buffer[length]!=0
922 ) {
923 log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
924 }
925
926 /* Set a sentence break iterator. */
927 errorCode=U_ZERO_ERROR;
928 sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode);
929 if(U_FAILURE(errorCode)) {
930 log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode));
931 ucasemap_close(csm);
932 return;
933 }
934 ucasemap_setBreakIterator(csm, sentenceIter, &errorCode);
935 if(U_FAILURE(errorCode)) {
936 log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode));
937 ubrk_close(sentenceIter);
938 ucasemap_close(csm);
939 return;
940 }
941 iter=ucasemap_getBreakIterator(csm);
942 if(iter!=sentenceIter) {
943 log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter);
944 }
945
946 ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode);
947 if(U_FAILURE(errorCode)) {
948 log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode));
949 return;
950 }
951
952 /* Use the sentence break iterator with the option. Preflight first. */
953 length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
954 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
955 length!=UPRV_LENGTHOF(titleSentNoLower)
956 ) {
957 log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
958 }
959
960 errorCode=U_ZERO_ERROR;
961 buffer[0]=0;
962 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
963 if( U_FAILURE(errorCode) ||
964 length!=UPRV_LENGTHOF(titleSentNoLower) ||
965 0!=u_memcmp(buffer, titleSentNoLower, length) ||
966 buffer[length]!=0
967 ) {
968 log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
969 }
970
971 /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
972 {
973 char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64];
974 int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength;
975
976 errorCode=U_ZERO_ERROR;
977 u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
978 u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower), &errorCode);
979
980 length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode);
981 if( U_FAILURE(errorCode) ||
982 length!=utf8TitleSentNoLowerLength ||
983 0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) ||
984 utf8[length]!=0
985 ) {
986 log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
987 }
988 }
989
990 ucasemap_close(csm);
991 }
992
993 #endif
994
995 /* Test case for internal API u_caseInsensitivePrefixMatch */
996 static void
TestUCaseInsensitivePrefixMatch(void)997 TestUCaseInsensitivePrefixMatch(void) {
998 struct {
999 const char *s1;
1000 const char *s2;
1001 int32_t r1;
1002 int32_t r2;
1003 } testCases[] = {
1004 {"ABC", "ab", 2, 2},
1005 {"ABCD", "abcx", 3, 3},
1006 {"ABC", "xyz", 0, 0},
1007 /* U+00DF LATIN SMALL LETTER SHARP S */
1008 {"A\\u00dfBC", "Ass", 2, 3},
1009 {"Fust", "Fu\\u00dfball", 2, 2},
1010 {"\\u00dfsA", "s\\u00dfB", 2, 2},
1011 {"\\u00dfs", "s\\u00df", 2, 2},
1012 /* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE */
1013 {"XYZ\\u0130i\\u0307xxx", "xyzi\\u0307\\u0130yyy", 6, 6},
1014 {0, 0, 0, 0}
1015 };
1016 int32_t i;
1017
1018 for (i = 0; testCases[i].s1 != 0; i++) {
1019 UErrorCode sts = U_ZERO_ERROR;
1020 UChar u1[64], u2[64];
1021 int32_t matchLen1, matchLen2;
1022
1023 u_unescape(testCases[i].s1, u1, 64);
1024 u_unescape(testCases[i].s2, u2, 64);
1025
1026 u_caseInsensitivePrefixMatch(u1, -1, u2, -1, 0, &matchLen1, &matchLen2, &sts);
1027 if (U_FAILURE(sts)) {
1028 log_err("error: %s, s1=%s, s2=%s", u_errorName(sts), testCases[i].s1, testCases[i].s2);
1029 } else if (matchLen1 != testCases[i].r1 || matchLen2 != testCases[i].r2) {
1030 log_err("s1=%s, s2=%2 / match len1=%d, len2=%d / expected len1=%d, len2=%d",
1031 testCases[i].s1, testCases[i].s2,
1032 matchLen1, matchLen2,
1033 testCases[i].r1, testCases[i].r2);
1034 }
1035 }
1036 }
1037
1038 void addCaseTest(TestNode** root);
1039
addCaseTest(TestNode ** root)1040 void addCaseTest(TestNode** root) {
1041 /* cstrcase.c functions, declared in cucdtst.h */
1042 addTest(root, &TestCaseLower, "tsutil/cstrcase/TestCaseLower");
1043 addTest(root, &TestCaseUpper, "tsutil/cstrcase/TestCaseUpper");
1044 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1045 addTest(root, &TestCaseTitle, "tsutil/cstrcase/TestCaseTitle");
1046 addTest(root, &TestCaseDutchTitle, "tsutil/cstrcase/TestCaseDutchTitle");
1047 #endif
1048 addTest(root, &TestCaseFolding, "tsutil/cstrcase/TestCaseFolding");
1049 addTest(root, &TestCaseCompare, "tsutil/cstrcase/TestCaseCompare");
1050 addTest(root, &TestUCaseMap, "tsutil/cstrcase/TestUCaseMap");
1051 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1052 addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle");
1053 #endif
1054 addTest(root, &TestUCaseInsensitivePrefixMatch, "tsutil/cstrcase/TestUCaseInsensitivePrefixMatch");
1055 }
1056