1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: cstrcase.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002feb21
16 * created by: Markus W. Scherer
17 *
18 * Test file for string casing C API functions.
19 */
20
21 #include <string.h>
22 #include "unicode/utypes.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uloc.h"
26 #include "unicode/ubrk.h"
27 #include "unicode/ucasemap.h"
28 #include "cmemory.h"
29 #include "cintltst.h"
30 #include "ucasemap_imp.h"
31 #include "ustr_imp.h"
32
33 /* test string case mapping functions --------------------------------------- */
34
35 static void
TestCaseLower(void)36 TestCaseLower(void) {
37 static const UChar
38
39 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
40 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
41 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff };
42
43 UChar buffer[32];
44 int32_t length;
45 UErrorCode errorCode;
46
47 /* lowercase with root locale and separate buffers */
48 buffer[0]=0xabcd;
49 errorCode=U_ZERO_ERROR;
50 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer),
51 beforeLower, UPRV_LENGTHOF(beforeLower),
52 "",
53 &errorCode);
54 if( U_FAILURE(errorCode) ||
55 length!=(UPRV_LENGTHOF(lowerRoot)) ||
56 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
57 buffer[length]!=0
58 ) {
59 log_err("error in u_strToLower(root locale)=%ld error=%s string matches: %s\t\nlowerRoot=%s\t\nbuffer=%s\n",
60 length,
61 u_errorName(errorCode),
62 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)==0 &&
63 buffer[length]==0 ? "yes" : "no",
64 aescstrdup(lowerRoot,-1),
65 aescstrdup(buffer,-1));
66 }
67
68 /* lowercase with turkish locale and in the same buffer */
69 uprv_memcpy(buffer, beforeLower, sizeof(beforeLower));
70 buffer[UPRV_LENGTHOF(beforeLower)]=0;
71 errorCode=U_ZERO_ERROR;
72 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer),
73 buffer, -1, /* implicit srcLength */
74 "tr",
75 &errorCode);
76 if( U_FAILURE(errorCode) ||
77 length!=(UPRV_LENGTHOF(lowerTurkish)) ||
78 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
79 buffer[length]!=0
80 ) {
81 log_err("error in u_strToLower(turkish locale)=%ld error=%s string matches: %s\n",
82 length,
83 u_errorName(errorCode),
84 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
85 }
86
87 /* test preflighting */
88 buffer[0]=buffer[2]=0xabcd;
89 errorCode=U_ZERO_ERROR;
90 length=u_strToLower(buffer, 2, /* set destCapacity=2 */
91 beforeLower, UPRV_LENGTHOF(beforeLower),
92 "",
93 &errorCode);
94 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
95 length!=(UPRV_LENGTHOF(lowerRoot)) ||
96 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
97 buffer[2]!=0xabcd
98 ) {
99 log_err("error in u_strToLower(root locale preflighting)=%ld error=%s string matches: %s\n",
100 length,
101 u_errorName(errorCode),
102 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
103 }
104
105 /* test error handling */
106 errorCode=U_ZERO_ERROR;
107 length=u_strToLower(NULL, UPRV_LENGTHOF(buffer),
108 beforeLower, UPRV_LENGTHOF(beforeLower),
109 "",
110 &errorCode);
111 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
112 log_err("error in u_strToLower(root locale dest=NULL)=%ld error=%s\n",
113 length,
114 u_errorName(errorCode));
115 }
116
117 buffer[0]=0xabcd;
118 errorCode=U_ZERO_ERROR;
119 length=u_strToLower(buffer, -1,
120 beforeLower, UPRV_LENGTHOF(beforeLower),
121 "",
122 &errorCode);
123 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
124 buffer[0]!=0xabcd
125 ) {
126 log_err("error in u_strToLower(root locale destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
127 length,
128 u_errorName(errorCode),
129 buffer[0]);
130 }
131 }
132
133 static void
TestCaseUpper(void)134 TestCaseUpper(void) {
135 static const UChar
136
137 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
138 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
139 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff };
140
141 UChar buffer[32];
142 int32_t length;
143 UErrorCode errorCode;
144
145 /* uppercase with root locale and in the same buffer */
146 uprv_memcpy(buffer, beforeUpper, sizeof(beforeUpper));
147 errorCode=U_ZERO_ERROR;
148 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
149 buffer, UPRV_LENGTHOF(beforeUpper),
150 "",
151 &errorCode);
152 if( U_FAILURE(errorCode) ||
153 length!=(UPRV_LENGTHOF(upperRoot)) ||
154 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
155 buffer[length]!=0
156 ) {
157 log_err("error in u_strToUpper(root locale)=%ld error=%s string matches: %s\n",
158 length,
159 u_errorName(errorCode),
160 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
161 }
162
163 /* uppercase with turkish locale and separate buffers */
164 buffer[0]=0xabcd;
165 errorCode=U_ZERO_ERROR;
166 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
167 beforeUpper, UPRV_LENGTHOF(beforeUpper),
168 "tr",
169 &errorCode);
170 if( U_FAILURE(errorCode) ||
171 length!=(UPRV_LENGTHOF(upperTurkish)) ||
172 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
173 buffer[length]!=0
174 ) {
175 log_err("error in u_strToUpper(turkish locale)=%ld error=%s string matches: %s\n",
176 length,
177 u_errorName(errorCode),
178 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
179 }
180
181 /* test preflighting */
182 errorCode=U_ZERO_ERROR;
183 length=u_strToUpper(NULL, 0,
184 beforeUpper, UPRV_LENGTHOF(beforeUpper),
185 "tr",
186 &errorCode);
187 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
188 length!=(UPRV_LENGTHOF(upperTurkish))
189 ) {
190 log_err("error in u_strToUpper(turkish locale pure preflighting)=%ld error=%s\n",
191 length,
192 u_errorName(errorCode));
193 }
194
195 /* test error handling */
196 buffer[0]=0xabcd;
197 errorCode=U_ZERO_ERROR;
198 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
199 NULL, UPRV_LENGTHOF(beforeUpper),
200 "tr",
201 &errorCode);
202 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
203 buffer[0]!=0xabcd
204 ) {
205 log_err("error in u_strToUpper(turkish locale src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
206 length,
207 u_errorName(errorCode),
208 buffer[0]);
209 }
210
211 buffer[0]=0xabcd;
212 errorCode=U_ZERO_ERROR;
213 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
214 beforeUpper, -2,
215 "tr",
216 &errorCode);
217 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
218 buffer[0]!=0xabcd
219 ) {
220 log_err("error in u_strToUpper(turkish locale srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
221 length,
222 u_errorName(errorCode),
223 buffer[0]);
224 }
225 }
226
227 #if !UCONFIG_NO_BREAK_ITERATION
228
229 static void
TestCaseTitle(void)230 TestCaseTitle(void) {
231 static const UChar
232
233 beforeTitle[]= { 0x61, 0x42, 0x20, 0x69, 0x3c2, 0x20, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
234 titleWord[]= { 0x41, 0x62, 0x20, 0x49, 0x3c2, 0x20, 0x53, 0x73, 0x3c3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff },
235 titleChar[]= { 0x41, 0x42, 0x20, 0x49, 0x3a3, 0x20, 0x53, 0x73, 0x3a3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff };
236
237 UChar buffer[32];
238 UBreakIterator *titleIterChars;
239 int32_t length;
240 UErrorCode errorCode;
241
242 errorCode=U_ZERO_ERROR;
243 titleIterChars=ubrk_open(UBRK_CHARACTER, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
244 if(U_FAILURE(errorCode)) {
245 log_err_status(errorCode, "error: ubrk_open(UBRK_CHARACTER)->%s\n", u_errorName(errorCode));
246 return;
247 }
248
249 /* titlecase with standard break iterator and in the same buffer */
250 uprv_memcpy(buffer, beforeTitle, sizeof(beforeTitle));
251 errorCode=U_ZERO_ERROR;
252 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
253 buffer, UPRV_LENGTHOF(beforeTitle),
254 NULL, "",
255 &errorCode);
256 if( U_FAILURE(errorCode) ||
257 length!=(UPRV_LENGTHOF(titleWord)) ||
258 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)!=0 ||
259 buffer[length]!=0
260 ) {
261 log_err("error in u_strToTitle(standard iterator)=%ld error=%s string matches: %s\n",
262 length,
263 u_errorName(errorCode),
264 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
265 }
266
267 /* titlecase with UBRK_CHARACTERS and separate buffers */
268 buffer[0]=0xabcd;
269 errorCode=U_ZERO_ERROR;
270 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
271 beforeTitle, UPRV_LENGTHOF(beforeTitle),
272 titleIterChars, "",
273 &errorCode);
274 if( U_FAILURE(errorCode) ||
275 length!=(UPRV_LENGTHOF(titleChar)) ||
276 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)!=0 ||
277 buffer[length]!=0
278 ) {
279 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s string matches: %s\n",
280 length,
281 u_errorName(errorCode),
282 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
283 }
284
285 /* test preflighting */
286 errorCode=U_ZERO_ERROR;
287 length=u_strToTitle(NULL, 0,
288 beforeTitle, UPRV_LENGTHOF(beforeTitle),
289 titleIterChars, "",
290 &errorCode);
291 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
292 length!=(UPRV_LENGTHOF(titleChar))
293 ) {
294 log_err("error in u_strToTitle(UBRK_CHARACTERS pure preflighting)=%ld error=%s\n",
295 length,
296 u_errorName(errorCode));
297 }
298
299 /* test error handling */
300 buffer[0]=0xabcd;
301 errorCode=U_ZERO_ERROR;
302 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
303 NULL, UPRV_LENGTHOF(beforeTitle),
304 titleIterChars, "",
305 &errorCode);
306 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
307 buffer[0]!=0xabcd
308 ) {
309 log_err("error in u_strToTitle(UBRK_CHARACTERS src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
310 length,
311 u_errorName(errorCode),
312 buffer[0]);
313 }
314
315 buffer[0]=0xabcd;
316 errorCode=U_ZERO_ERROR;
317 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
318 beforeTitle, -2,
319 titleIterChars, "",
320 &errorCode);
321 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
322 buffer[0]!=0xabcd
323 ) {
324 log_err("error in u_strToTitle(UBRK_CHARACTERS srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
325 length,
326 u_errorName(errorCode),
327 buffer[0]);
328 }
329
330 ubrk_close(titleIterChars);
331 }
332
333 static void
TestCaseDutchTitle(void)334 TestCaseDutchTitle(void) {
335 static const UChar
336
337 beforeTitle[]= { 0x69, 0x6A, 0x73, 0x73, 0x45, 0x6c, 0x20, 0x69, 0x67, 0x6c, 0x4f, 0x6f , 0x20 , 0x49, 0x4A, 0x53, 0x53, 0x45, 0x4C },
338 titleRoot[]= { 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6C },
339 titleDutch[]= { 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6C };
340
341 UChar buffer[32];
342 UBreakIterator *titleIterWord;
343 int32_t length;
344 UErrorCode errorCode;
345
346 errorCode=U_ZERO_ERROR;
347 titleIterWord=ubrk_open(UBRK_WORD, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
348 if(U_FAILURE(errorCode)) {
349 log_err_status(errorCode, "error: ubrk_open(UBRK_WORD)->%s\n", u_errorName(errorCode));
350 return;
351 }
352
353 /* titlecase with default locale */
354 buffer[0]=0xabcd;
355 errorCode=U_ZERO_ERROR;
356 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
357 beforeTitle, UPRV_LENGTHOF(beforeTitle),
358 titleIterWord, "",
359 &errorCode);
360 if( U_FAILURE(errorCode) ||
361 length!=(UPRV_LENGTHOF(titleRoot)) ||
362 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
363 buffer[length]!=0
364 ) {
365 char charsOut[21];
366 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
367 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s root locale string matches: %s\noutput buffer is {%s}\n",
368 length,
369 u_errorName(errorCode),
370 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
371 }
372 /* titlecase with Dutch locale */
373 buffer[0]=0xabcd;
374 errorCode=U_ZERO_ERROR;
375 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
376 beforeTitle, UPRV_LENGTHOF(beforeTitle),
377 titleIterWord, "nl",
378 &errorCode);
379 if( U_FAILURE(errorCode) ||
380 length!=(UPRV_LENGTHOF(titleDutch)) ||
381 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)!=0 ||
382 buffer[length]!=0
383 ) {
384 char charsOut[21];
385 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
386 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s dutch locale string matches: %s\noutput buffer is {%s}\n",
387 length,
388 u_errorName(errorCode),
389 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
390 }
391
392 ubrk_close(titleIterWord);
393 }
394
395 #endif
396
397 /* test case folding and case-insensitive string compare -------------------- */
398
399 static void
TestCaseFolding(void)400 TestCaseFolding(void) {
401 /*
402 * CaseFolding.txt says about i and its cousins:
403 * 0049; C; 0069; # LATIN CAPITAL LETTER I
404 * 0049; T; 0131; # LATIN CAPITAL LETTER I
405 *
406 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
407 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
408 * That's all.
409 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
410 */
411 static const UChar32
412 simple[]={
413 /* input, default, exclude special i */
414 0x61, 0x61, 0x61,
415 0x49, 0x69, 0x131,
416 0x130, 0x130, 0x69,
417 0x131, 0x131, 0x131,
418 0xdf, 0xdf, 0xdf,
419 0xfb03, 0xfb03, 0xfb03,
420 0x1040e,0x10436,0x10436,
421 0x5ffff,0x5ffff,0x5ffff
422 };
423
424 static const UChar
425 mixed[]= { 0x61, 0x42, 0x130, 0x49, 0x131, 0x3d0, 0xdf, 0xfb03, 0xd93f, 0xdfff },
426 foldedDefault[]= { 0x61, 0x62, 0x69, 0x307, 0x69, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff },
427 foldedExcludeSpecialI[]={ 0x61, 0x62, 0x69, 0x131, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff };
428
429 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
430
431 const UChar32 *p;
432 int32_t i;
433
434 UChar buffer[32];
435 int32_t length;
436 UErrorCode errorCode;
437 UBool isUnicode_3_1;
438
439 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
440 u_getUnicodeVersion(unicodeVersion);
441 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
442
443 /* test simple case folding */
444 p=simple;
445 for(i=0; i<(int32_t)sizeof(simple)/12; p+=3, ++i) {
446 if(u_foldCase(p[0], U_FOLD_CASE_DEFAULT)!=p[1]) {
447 log_err("error: u_foldCase(0x%04lx, default)=0x%04lx instead of 0x%04lx\n",
448 p[0], u_foldCase(p[0], U_FOLD_CASE_DEFAULT), p[1]);
449 return;
450 }
451
452 if(isUnicode_3_1 && u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I)!=p[2]) {
453 log_err("error: u_foldCase(0x%04lx, exclude special i)=0x%04lx instead of 0x%04lx\n",
454 p[0], u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I), p[2]);
455 return;
456 }
457 }
458
459 /* test full string case folding with default option and separate buffers */
460 buffer[0]=0xabcd;
461 errorCode=U_ZERO_ERROR;
462 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
463 mixed, UPRV_LENGTHOF(mixed),
464 U_FOLD_CASE_DEFAULT,
465 &errorCode);
466 if( U_FAILURE(errorCode) ||
467 length!=(UPRV_LENGTHOF(foldedDefault)) ||
468 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
469 buffer[length]!=0
470 ) {
471 log_err("error in u_strFoldCase(default)=%ld error=%s string matches: %s\n",
472 length,
473 u_errorName(errorCode),
474 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
475 }
476
477 /* exclude special i */
478 if(isUnicode_3_1) {
479 buffer[0]=0xabcd;
480 errorCode=U_ZERO_ERROR;
481 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
482 mixed, UPRV_LENGTHOF(mixed),
483 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
484 &errorCode);
485 if( U_FAILURE(errorCode) ||
486 length!=(UPRV_LENGTHOF(foldedExcludeSpecialI)) ||
487 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
488 buffer[length]!=0
489 ) {
490 log_err("error in u_strFoldCase(exclude special i)=%ld error=%s string matches: %s\n",
491 length,
492 u_errorName(errorCode),
493 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
494 }
495 }
496
497 /* test full string case folding with default option and in the same buffer */
498 uprv_memcpy(buffer, mixed, sizeof(mixed));
499 buffer[UPRV_LENGTHOF(mixed)]=0;
500 errorCode=U_ZERO_ERROR;
501 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
502 buffer, -1, /* implicit srcLength */
503 U_FOLD_CASE_DEFAULT,
504 &errorCode);
505 if( U_FAILURE(errorCode) ||
506 length!=(UPRV_LENGTHOF(foldedDefault)) ||
507 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
508 buffer[length]!=0
509 ) {
510 log_err("error in u_strFoldCase(default same buffer)=%ld error=%s string matches: %s\n",
511 length,
512 u_errorName(errorCode),
513 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
514 }
515
516 /* test full string case folding, exclude special i, in the same buffer */
517 if(isUnicode_3_1) {
518 uprv_memcpy(buffer, mixed, sizeof(mixed));
519 errorCode=U_ZERO_ERROR;
520 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
521 buffer, UPRV_LENGTHOF(mixed),
522 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
523 &errorCode);
524 if( U_FAILURE(errorCode) ||
525 length!=UPRV_LENGTHOF(foldedExcludeSpecialI) ||
526 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
527 buffer[length]!=0
528 ) {
529 log_err("error in u_strFoldCase(exclude special i same buffer)=%ld error=%s string matches: %s\n",
530 length,
531 u_errorName(errorCode),
532 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
533 }
534 }
535
536 /* test preflighting */
537 buffer[0]=buffer[2]=0xabcd;
538 errorCode=U_ZERO_ERROR;
539 length=u_strFoldCase(buffer, 2, /* set destCapacity=2 */
540 mixed, UPRV_LENGTHOF(mixed),
541 U_FOLD_CASE_DEFAULT,
542 &errorCode);
543 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
544 length!=UPRV_LENGTHOF(foldedDefault) ||
545 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
546 buffer[2]!=0xabcd
547 ) {
548 log_err("error in u_strFoldCase(default preflighting)=%ld error=%s string matches: %s\n",
549 length,
550 u_errorName(errorCode),
551 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
552 }
553
554 errorCode=U_ZERO_ERROR;
555 length=u_strFoldCase(NULL, 0,
556 mixed, UPRV_LENGTHOF(mixed),
557 U_FOLD_CASE_DEFAULT,
558 &errorCode);
559 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
560 length!=UPRV_LENGTHOF(foldedDefault)
561 ) {
562 log_err("error in u_strFoldCase(default pure preflighting)=%ld error=%s\n",
563 length,
564 u_errorName(errorCode));
565 }
566
567 /* test error handling */
568 errorCode=U_ZERO_ERROR;
569 length=u_strFoldCase(NULL, UPRV_LENGTHOF(buffer),
570 mixed, UPRV_LENGTHOF(mixed),
571 U_FOLD_CASE_DEFAULT,
572 &errorCode);
573 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
574 log_err("error in u_strFoldCase(default dest=NULL)=%ld error=%s\n",
575 length,
576 u_errorName(errorCode));
577 }
578
579 buffer[0]=0xabcd;
580 errorCode=U_ZERO_ERROR;
581 length=u_strFoldCase(buffer, -1,
582 mixed, UPRV_LENGTHOF(mixed),
583 U_FOLD_CASE_DEFAULT,
584 &errorCode);
585 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
586 buffer[0]!=0xabcd
587 ) {
588 log_err("error in u_strFoldCase(default destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
589 length,
590 u_errorName(errorCode),
591 buffer[0]);
592 }
593
594 buffer[0]=0xabcd;
595 errorCode=U_ZERO_ERROR;
596 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
597 NULL, UPRV_LENGTHOF(mixed),
598 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
599 &errorCode);
600 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
601 buffer[0]!=0xabcd
602 ) {
603 log_err("error in u_strFoldCase(exclude special i src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
604 length,
605 u_errorName(errorCode),
606 buffer[0]);
607 }
608
609 buffer[0]=0xabcd;
610 errorCode=U_ZERO_ERROR;
611 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
612 mixed, -2,
613 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
614 &errorCode);
615 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
616 buffer[0]!=0xabcd
617 ) {
618 log_err("error in u_strFoldCase(exclude special i srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
619 length,
620 u_errorName(errorCode),
621 buffer[0]);
622 }
623 }
624
625 static void
TestCaseCompare(void)626 TestCaseCompare(void) {
627 static const UChar
628
629 mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0xfb03, 0xd93f, 0xdfff, 0 },
630 otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
631 otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
632 different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
633
634 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
635
636 int32_t result, lenMixed, lenOtherDefault, lenOtherExcludeSpecialI, lenDifferent;
637 UErrorCode errorCode;
638 UBool isUnicode_3_1;
639
640 errorCode=U_ZERO_ERROR;
641
642 lenMixed=u_strlen(mixed);
643 lenOtherDefault=u_strlen(otherDefault);
644 (void)lenOtherDefault; /* Suppress set but not used warning. */
645 lenOtherExcludeSpecialI=u_strlen(otherExcludeSpecialI);
646 lenDifferent=u_strlen(different);
647
648 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
649 u_getUnicodeVersion(unicodeVersion);
650 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
651 (void)isUnicode_3_1; /* Suppress set but not used warning. */
652
653 /* test u_strcasecmp() */
654 result=u_strcasecmp(mixed, otherDefault, U_FOLD_CASE_DEFAULT);
655 if(result!=0) {
656 log_err("error: u_strcasecmp(mixed, other, default)=%ld instead of 0\n", result);
657 }
658 result=u_strCaseCompare(mixed, -1, otherDefault, -1, U_FOLD_CASE_DEFAULT, &errorCode);
659 if(result!=0) {
660 log_err("error: u_strCaseCompare(mixed, other, default)=%ld instead of 0\n", result);
661 }
662
663 /* test u_strcasecmp() - exclude special i */
664 result=u_strcasecmp(mixed, otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
665 if(result!=0) {
666 log_err("error: u_strcasecmp(mixed, other, exclude special i)=%ld instead of 0\n", result);
667 }
668 result=u_strCaseCompare(mixed, lenMixed, otherExcludeSpecialI, lenOtherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
669 if(result!=0) {
670 log_err("error: u_strCaseCompare(mixed, other, exclude special i)=%ld instead of 0\n", result);
671 }
672
673 /* test u_strcasecmp() */
674 result=u_strcasecmp(mixed, different, U_FOLD_CASE_DEFAULT);
675 if(result<=0) {
676 log_err("error: u_strcasecmp(mixed, different, default)=%ld instead of positive\n", result);
677 }
678 result=u_strCaseCompare(mixed, -1, different, lenDifferent, U_FOLD_CASE_DEFAULT, &errorCode);
679 if(result<=0) {
680 log_err("error: u_strCaseCompare(mixed, different, default)=%ld instead of positive\n", result);
681 }
682
683 /* test u_strncasecmp() - stop before the sharp s (U+00df) */
684 result=u_strncasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
685 if(result!=0) {
686 log_err("error: u_strncasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
687 }
688 result=u_strCaseCompare(mixed, 4, different, 4, U_FOLD_CASE_DEFAULT, &errorCode);
689 if(result!=0) {
690 log_err("error: u_strCaseCompare(mixed, 4, different, 4, default)=%ld instead of 0\n", result);
691 }
692
693 /* test u_strncasecmp() - stop in the middle of the sharp s (U+00df) */
694 result=u_strncasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
695 if(result<=0) {
696 log_err("error: u_strncasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
697 }
698 result=u_strCaseCompare(mixed, 5, different, 5, U_FOLD_CASE_DEFAULT, &errorCode);
699 if(result<=0) {
700 log_err("error: u_strCaseCompare(mixed, 5, different, 5, default)=%ld instead of positive\n", result);
701 }
702
703 /* test u_memcasecmp() - stop before the sharp s (U+00df) */
704 result=u_memcasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
705 if(result!=0) {
706 log_err("error: u_memcasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
707 }
708
709 /* test u_memcasecmp() - stop in the middle of the sharp s (U+00df) */
710 result=u_memcasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
711 if(result<=0) {
712 log_err("error: u_memcasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
713 }
714 }
715
716 /* test UCaseMap ------------------------------------------------------------ */
717
718 /*
719 * API test for UCaseMap;
720 * test cases for actual case mappings using UCaseMap see
721 * intltest utility/UnicodeStringTest/StringCaseTest/TestCasing
722 */
723 static void
TestUCaseMap(void)724 TestUCaseMap(void) {
725 static const char
726 aBc[] ={ 0x61, 0x42, 0x63, 0 },
727 abc[] ={ 0x61, 0x62, 0x63, 0 },
728 ABCg[]={ 0x41, 0x42, 0x43, 0x67, 0 },
729 defg[]={ 0x64, 0x65, 0x66, 0x67, 0 };
730 char utf8Out[8];
731
732 UCaseMap *csm;
733 const char *locale;
734 uint32_t options;
735 int32_t length;
736 UErrorCode errorCode;
737
738 errorCode=U_ZERO_ERROR;
739 csm=ucasemap_open("tur", 0xa5, &errorCode);
740 if(U_FAILURE(errorCode)) {
741 log_err("ucasemap_open(\"tur\") failed - %s\n", u_errorName(errorCode));
742 return;
743 }
744 locale=ucasemap_getLocale(csm);
745 if(0!=strcmp(locale, "tr")) {
746 log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale);
747 }
748 /* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */
749 ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
750 locale=ucasemap_getLocale(csm);
751 // "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog" is canonicalized
752 // into "tlh-the-quick-brown-fox-jumps-over-the-lazy-dog"
753 // and "the" will be treated as an extlang which replaces "tlh".
754 if(0!=strncmp(locale, "the", 3)) {
755 log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n"
756 " does not start with \"the\"\n", locale);
757 }
758
759 errorCode=U_ZERO_ERROR;
760 options=ucasemap_getOptions(csm);
761 if(options!=0xa5) {
762 log_err("ucasemap_getOptions(ucasemap_open(0xa5))==0x%lx!=0xa5\n", (long)options);
763 }
764 ucasemap_setOptions(csm, 0x333333, &errorCode);
765 options=ucasemap_getOptions(csm);
766 if(options!=0x333333) {
767 log_err("ucasemap_getOptions(ucasemap_setOptions(0x333333))==0x%lx!=0x333333\n", (long)options);
768 }
769
770 /* test case mapping API; not all permutations necessary due to shared implementation code */
771
772 /* NUL terminated source */
773 errorCode=U_ZERO_ERROR;
774 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
775 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
776 log_err("ucasemap_utf8ToLower(aBc\\0) failed\n");
777 }
778
779 /* incoming failure code */
780 errorCode=U_PARSE_ERROR;
781 strcpy(utf8Out, defg);
782 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
783 if(errorCode!=U_PARSE_ERROR || 0!=strcmp(defg, utf8Out)) {
784 log_err("ucasemap_utf8ToLower(failure) failed\n");
785 }
786
787 /* overlapping input & output */
788 errorCode=U_ZERO_ERROR;
789 strcpy(utf8Out, aBc);
790 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, utf8Out+1, 2, &errorCode);
791 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
792 log_err("ucasemap_utf8ToUpper(overlap 1) failed\n");
793 }
794
795 /* overlap in the other direction */
796 errorCode=U_ZERO_ERROR;
797 strcpy(utf8Out, aBc);
798 length=ucasemap_utf8ToUpper(csm, utf8Out+1, 2, utf8Out, 2, &errorCode);
799 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
800 log_err("ucasemap_utf8ToUpper(overlap 2) failed\n");
801 }
802
803 /* NULL destination */
804 errorCode=U_ZERO_ERROR;
805 strcpy(utf8Out, defg);
806 length=ucasemap_utf8ToLower(csm, NULL, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
807 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
808 log_err("ucasemap_utf8ToLower(dest=NULL) failed\n");
809 }
810
811 /* destCapacity<0 */
812 errorCode=U_ZERO_ERROR;
813 strcpy(utf8Out, defg);
814 length=ucasemap_utf8ToLower(csm, utf8Out, -2, aBc, -1, &errorCode);
815 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
816 log_err("ucasemap_utf8ToLower(destCapacity<0) failed\n");
817 }
818
819 /* NULL source */
820 errorCode=U_ZERO_ERROR;
821 strcpy(utf8Out, defg);
822 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), NULL, -1, &errorCode);
823 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
824 log_err("ucasemap_utf8ToLower(src=NULL) failed\n");
825 }
826
827 /* srcLength<-1 */
828 errorCode=U_ZERO_ERROR;
829 strcpy(utf8Out, defg);
830 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -2, &errorCode);
831 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
832 log_err("ucasemap_utf8ToLower(srcLength<-1) failed\n");
833 }
834
835 /* buffer overflow */
836 errorCode=U_ZERO_ERROR;
837 strcpy(utf8Out, defg);
838 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, aBc, 3, &errorCode);
839 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3 || 0!=strcmp(defg+2, utf8Out+2)) {
840 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
841 }
842
843 /* dest not terminated (leaves g from defg alone) */
844 errorCode=U_ZERO_ERROR;
845 strcpy(utf8Out, defg);
846 length=ucasemap_utf8ToUpper(csm, utf8Out, 3, aBc, 3, &errorCode);
847 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=3 || 0!=strcmp(ABCg, utf8Out)) {
848 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
849 }
850
851 /* C API coverage for case folding. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
852 errorCode=U_ZERO_ERROR;
853 utf8Out[0]=0;
854 length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3, &errorCode);
855 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
856 log_err("ucasemap_utf8FoldCase(aBc) failed\n");
857 }
858
859 ucasemap_close(csm);
860 }
861
862 #if !UCONFIG_NO_BREAK_ITERATION
863
864 /* Try titlecasing with options. */
865 static void
TestUCaseMapToTitle(void)866 TestUCaseMapToTitle(void) {
867 /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */
868 /*
869 * Note: The sentence BreakIterator does not recognize a '.'
870 * as a sentence terminator if it is followed by lowercase.
871 * That is why the example has the '!'.
872 */
873 static const UChar
874
875 beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e },
876 titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e },
877 titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e },
878 titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e };
879
880 UChar buffer[32];
881 UCaseMap *csm;
882 UBreakIterator *sentenceIter;
883 const UBreakIterator *iter;
884 int32_t length;
885 UErrorCode errorCode;
886
887 errorCode=U_ZERO_ERROR;
888 csm=ucasemap_open("", 0, &errorCode);
889 if(U_FAILURE(errorCode)) {
890 log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode));
891 return;
892 }
893
894 iter=ucasemap_getBreakIterator(csm);
895 if(iter!=NULL) {
896 log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter);
897 }
898
899 /* Use default UBreakIterator: Word breaks. */
900 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
901 if( U_FAILURE(errorCode) ||
902 length!=UPRV_LENGTHOF(titleWord) ||
903 0!=u_memcmp(buffer, titleWord, length) ||
904 buffer[length]!=0
905 ) {
906 log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
907 }
908 if (U_SUCCESS(errorCode)) {
909 iter=ucasemap_getBreakIterator(csm);
910 if(iter==NULL) {
911 log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n");
912 }
913 }
914
915 /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */
916 ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode);
917 if(U_FAILURE(errorCode)) {
918 log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode));
919 ucasemap_close(csm);
920 return;
921 }
922
923 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
924 if( U_FAILURE(errorCode) ||
925 length!=UPRV_LENGTHOF(titleWordNoAdjust) ||
926 0!=u_memcmp(buffer, titleWordNoAdjust, length) ||
927 buffer[length]!=0
928 ) {
929 log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
930 }
931
932 /* Set a sentence break iterator. */
933 errorCode=U_ZERO_ERROR;
934 sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode);
935 if(U_FAILURE(errorCode)) {
936 log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode));
937 ucasemap_close(csm);
938 return;
939 }
940 ucasemap_setBreakIterator(csm, sentenceIter, &errorCode);
941 if(U_FAILURE(errorCode)) {
942 log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode));
943 ubrk_close(sentenceIter);
944 ucasemap_close(csm);
945 return;
946 }
947 iter=ucasemap_getBreakIterator(csm);
948 if(iter!=sentenceIter) {
949 log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter);
950 }
951
952 ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode);
953 if(U_FAILURE(errorCode)) {
954 log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode));
955 ucasemap_close(csm);
956 return;
957 }
958
959 /* Use the sentence break iterator with the option. Preflight first. */
960 length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
961 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
962 length!=UPRV_LENGTHOF(titleSentNoLower)
963 ) {
964 log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
965 }
966
967 errorCode=U_ZERO_ERROR;
968 buffer[0]=0;
969 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
970 if( U_FAILURE(errorCode) ||
971 length!=UPRV_LENGTHOF(titleSentNoLower) ||
972 0!=u_memcmp(buffer, titleSentNoLower, length) ||
973 buffer[length]!=0
974 ) {
975 log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
976 }
977
978 /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
979 {
980 char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64];
981 int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength;
982
983 errorCode=U_ZERO_ERROR;
984 u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
985 u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower), &errorCode);
986
987 length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode);
988 if( U_FAILURE(errorCode) ||
989 length!=utf8TitleSentNoLowerLength ||
990 0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) ||
991 utf8[length]!=0
992 ) {
993 log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
994 }
995 }
996
997 ucasemap_close(csm);
998 }
999
1000 #endif
1001
1002 /* Test case for internal API u_caseInsensitivePrefixMatch */
1003 static void
TestUCaseInsensitivePrefixMatch(void)1004 TestUCaseInsensitivePrefixMatch(void) {
1005 struct {
1006 const char *s1;
1007 const char *s2;
1008 int32_t r1;
1009 int32_t r2;
1010 } testCases[] = {
1011 {"ABC", "ab", 2, 2},
1012 {"ABCD", "abcx", 3, 3},
1013 {"ABC", "xyz", 0, 0},
1014 /* U+00DF LATIN SMALL LETTER SHARP S */
1015 {"A\\u00dfBC", "Ass", 2, 3},
1016 {"Fust", "Fu\\u00dfball", 2, 2},
1017 {"\\u00dfsA", "s\\u00dfB", 2, 2},
1018 {"\\u00dfs", "s\\u00df", 2, 2},
1019 /* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE */
1020 {"XYZ\\u0130i\\u0307xxx", "xyzi\\u0307\\u0130yyy", 6, 6},
1021 {0, 0, 0, 0}
1022 };
1023 int32_t i;
1024
1025 for (i = 0; testCases[i].s1 != 0; i++) {
1026 UErrorCode sts = U_ZERO_ERROR;
1027 UChar u1[64], u2[64];
1028 int32_t matchLen1, matchLen2;
1029
1030 u_unescape(testCases[i].s1, u1, 64);
1031 u_unescape(testCases[i].s2, u2, 64);
1032
1033 u_caseInsensitivePrefixMatch(u1, -1, u2, -1, 0, &matchLen1, &matchLen2, &sts);
1034 if (U_FAILURE(sts)) {
1035 log_err("error: %s, s1=%s, s2=%s", u_errorName(sts), testCases[i].s1, testCases[i].s2);
1036 } else if (matchLen1 != testCases[i].r1 || matchLen2 != testCases[i].r2) {
1037 log_err("s1=%s, s2=%2 / match len1=%d, len2=%d / expected len1=%d, len2=%d",
1038 testCases[i].s1, testCases[i].s2,
1039 matchLen1, matchLen2,
1040 testCases[i].r1, testCases[i].r2);
1041 }
1042 }
1043 }
1044
1045 void addCaseTest(TestNode** root);
1046
addCaseTest(TestNode ** root)1047 void addCaseTest(TestNode** root) {
1048 /* cstrcase.c functions, declared in cucdtst.h */
1049 addTest(root, &TestCaseLower, "tsutil/cstrcase/TestCaseLower");
1050 addTest(root, &TestCaseUpper, "tsutil/cstrcase/TestCaseUpper");
1051 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1052 addTest(root, &TestCaseTitle, "tsutil/cstrcase/TestCaseTitle");
1053 addTest(root, &TestCaseDutchTitle, "tsutil/cstrcase/TestCaseDutchTitle");
1054 #endif
1055 addTest(root, &TestCaseFolding, "tsutil/cstrcase/TestCaseFolding");
1056 addTest(root, &TestCaseCompare, "tsutil/cstrcase/TestCaseCompare");
1057 addTest(root, &TestUCaseMap, "tsutil/cstrcase/TestUCaseMap");
1058 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1059 addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle");
1060 #endif
1061 addTest(root, &TestUCaseInsensitivePrefixMatch, "tsutil/cstrcase/TestUCaseInsensitivePrefixMatch");
1062 }
1063