• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2002-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  strcase.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2002mar12
16 *   created by: Markus W. Scherer
17 *
18 *   Test file for string casing C++ API functions.
19 */
20 
21 #include "unicode/std_string.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/casemap.h"
24 #include "unicode/edits.h"
25 #include "unicode/uchar.h"
26 #include "unicode/ures.h"
27 #include "unicode/uloc.h"
28 #include "unicode/locid.h"
29 #include "unicode/ubrk.h"
30 #include "unicode/unistr.h"
31 #include "unicode/ucasemap.h"
32 #include "unicode/ustring.h"
33 #include "ucase.h"
34 #include "ustrtest.h"
35 #include "unicode/tstdtmod.h"
36 #include "cmemory.h"
37 #include "testutil.h"
38 
39 class StringCaseTest: public IntlTest {
40 public:
41     StringCaseTest();
42     virtual ~StringCaseTest();
43 
44     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
45 
46     void TestCaseConversion();
47 
48     void TestCasingImpl(const UnicodeString &input,
49                         const UnicodeString &output,
50                         int32_t whichCase,
51                         void *iter, const char *localeID, uint32_t options);
52     void TestCasing();
53     void TestTitleOptions();
54     void TestFullCaseFoldingIterator();
55     void TestGreekUpper();
56     void TestLongUpper();
57     void TestMalformedUTF8();
58     void TestBufferOverflow();
59     void TestEdits();
60     void TestCopyMoveEdits();
61     void TestEditsFindFwdBwd();
62     void TestMergeEdits();
63     void TestCaseMapWithEdits();
64     void TestCaseMapUTF8WithEdits();
65     void TestCaseMapToString();
66     void TestCaseMapUTF8ToString();
67     void TestLongUnicodeString();
68     void TestBug13127();
69     void TestInPlaceTitle();
70 
71 private:
72     void assertGreekUpper(const char16_t *s, const char16_t *expected);
73 
74     Locale GREEK_LOCALE_;
75 };
76 
StringCaseTest()77 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
78 
~StringCaseTest()79 StringCaseTest::~StringCaseTest() {}
80 
createStringCaseTest()81 extern IntlTest *createStringCaseTest() {
82     return new StringCaseTest();
83 }
84 
85 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)86 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
87     if(exec) {
88         logln("TestSuite StringCaseTest: ");
89     }
90     TESTCASE_AUTO_BEGIN;
91     TESTCASE_AUTO(TestCaseConversion);
92 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
93     TESTCASE_AUTO(TestCasing);
94     TESTCASE_AUTO(TestTitleOptions);
95 #endif
96     TESTCASE_AUTO(TestFullCaseFoldingIterator);
97     TESTCASE_AUTO(TestGreekUpper);
98     TESTCASE_AUTO(TestLongUpper);
99     TESTCASE_AUTO(TestMalformedUTF8);
100     TESTCASE_AUTO(TestBufferOverflow);
101     TESTCASE_AUTO(TestEdits);
102     TESTCASE_AUTO(TestCopyMoveEdits);
103     TESTCASE_AUTO(TestEditsFindFwdBwd);
104     TESTCASE_AUTO(TestMergeEdits);
105     TESTCASE_AUTO(TestCaseMapWithEdits);
106     TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
107     TESTCASE_AUTO(TestCaseMapToString);
108     TESTCASE_AUTO(TestCaseMapUTF8ToString);
109     TESTCASE_AUTO(TestLongUnicodeString);
110 #if !UCONFIG_NO_BREAK_ITERATION
111     TESTCASE_AUTO(TestBug13127);
112     TESTCASE_AUTO(TestInPlaceTitle);
113 #endif
114     TESTCASE_AUTO_END;
115 }
116 
117 void
TestCaseConversion()118 StringCaseTest::TestCaseConversion()
119 {
120     static const UChar uppercaseGreek[] =
121         { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
122         0x39f, 0x3a3, 0 };
123         // "IESUS CHRISTOS"
124 
125     static const UChar lowercaseGreek[] =
126         { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
127         0x3bf, 0x3c2, 0 };
128         // "iesus christos"
129 
130     static const UChar lowercaseTurkish[] =
131         { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
132         0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
133 
134     static const UChar uppercaseTurkish[] =
135         { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
136         0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
137 
138     UnicodeString expectedResult;
139     UnicodeString   test3;
140 
141     test3 += (UChar32)0x0130;
142     test3 += "STANBUL, NOT CONSTANTINOPLE!";
143 
144     UnicodeString   test4(test3);
145     test4.toLower(Locale(""));
146     expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
147     if (test4 != expectedResult)
148         errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
149 
150     test4 = test3;
151     test4.toLower(Locale("tr", "TR"));
152     expectedResult = lowercaseTurkish;
153     if (test4 != expectedResult)
154         errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
155 
156     test3 = "topkap";
157     test3 += (UChar32)0x0131;
158     test3 += " palace, istanbul";
159     test4 = test3;
160 
161     test4.toUpper(Locale(""));
162     expectedResult = "TOPKAPI PALACE, ISTANBUL";
163     if (test4 != expectedResult)
164         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
165 
166     test4 = test3;
167     test4.toUpper(Locale("tr", "TR"));
168     expectedResult = uppercaseTurkish;
169     if (test4 != expectedResult)
170         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
171 
172     test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
173 
174     test3.toUpper(Locale("de", "DE"));
175     expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
176     if (test3 != expectedResult)
177         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
178 
179     test4.replace(0, test4.length(), uppercaseGreek);
180 
181     test4.toLower(Locale("el", "GR"));
182     expectedResult = lowercaseGreek;
183     if (test4 != expectedResult)
184         errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
185 
186     test4.replace(0, test4.length(), lowercaseGreek);
187 
188     test4.toUpper();
189     expectedResult = uppercaseGreek;
190     if (test4 != expectedResult)
191         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
192 
193     // more string case mapping tests with the new implementation
194     {
195         static const UChar
196 
197         beforeLower[]= { 0x61, 0x42, 0x49,  0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
198         lowerRoot[]=   { 0x61, 0x62, 0x69,  0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
199         lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
200 
201         beforeUpper[]= { 0x61, 0x42, 0x69,  0x3c2, 0xdf,       0x3c3, 0x2f, 0xfb03,           0xfb03,           0xfb03,           0xd93f, 0xdfff },
202         upperRoot[]=   { 0x41, 0x42, 0x49,  0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
203         upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
204 
205         beforeMiniUpper[]=  { 0xdf, 0x61 },
206         miniUpper[]=        { 0x53, 0x53, 0x41 };
207 
208         UnicodeString s;
209 
210         /* lowercase with root locale */
211         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
212         s.toLower("");
213         if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
214             s!=UnicodeString(FALSE, lowerRoot, s.length())
215         ) {
216             errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
217         }
218 
219         /* lowercase with turkish locale */
220         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
221         s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
222         if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
223             s!=UnicodeString(FALSE, lowerTurkish, s.length())
224         ) {
225             errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
226         }
227 
228         /* uppercase with root locale */
229         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
230         s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
231         if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
232             s!=UnicodeString(FALSE, upperRoot, s.length())
233         ) {
234             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
235         }
236 
237         /* uppercase with turkish locale */
238         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
239         s.toUpper(Locale("tr"));
240         if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
241             s!=UnicodeString(FALSE, upperTurkish, s.length())
242         ) {
243             errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
244         }
245 
246         /* uppercase a short string with root locale */
247         s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
248         s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
249         if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
250             s!=UnicodeString(FALSE, miniUpper, s.length())
251         ) {
252             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
253         }
254     }
255 
256     // test some supplementary characters (>= Unicode 3.1)
257     {
258         UnicodeString t;
259 
260         UnicodeString
261             deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
262             deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
263             deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
264         (t=deseretInput).toLower();
265         if(t!=deseretLower) {
266             errln("error lowercasing Deseret (plane 1) characters");
267         }
268         (t=deseretInput).toUpper();
269         if(t!=deseretUpper) {
270             errln("error uppercasing Deseret (plane 1) characters");
271         }
272     }
273 
274     // test some more cases that looked like problems
275     {
276         UnicodeString t;
277 
278         UnicodeString
279             ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
280             ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
281             ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
282         (t=ljInput).toLower("en");
283         if(t!=ljLower) {
284             errln("error lowercasing LJ characters");
285         }
286         (t=ljInput).toUpper("en");
287         if(t!=ljUpper) {
288             errln("error uppercasing LJ characters");
289         }
290     }
291 
292 #if !UCONFIG_NO_NORMALIZATION
293     // some context-sensitive casing depends on normalization data being present
294 
295     // Unicode 3.1.1 SpecialCasing tests
296     {
297         UnicodeString t;
298 
299         // sigmas preceded and/or followed by cased letters
300         UnicodeString
301             sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
302             sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
303             sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
304 
305         (t=sigmas).toLower();
306         if(t!=sigmasLower) {
307             errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
308         }
309 
310         (t=sigmas).toUpper(Locale(""));
311         if(t!=sigmasUpper) {
312             errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
313         }
314 
315         // turkish & azerbaijani dotless i & dotted I
316         // remove dot above if there was a capital I before and there are no more accents above
317         UnicodeString
318             dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
319             dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
320             dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
321 
322         (t=dots).toLower("tr");
323         if(t!=dotsTurkish) {
324             errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
325         }
326 
327         (t=dots).toLower("de");
328         if(t!=dotsDefault) {
329             errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
330         }
331     }
332 
333     // more Unicode 3.1.1 tests
334     {
335         UnicodeString t;
336 
337         // lithuanian dot above in uppercasing
338         UnicodeString
339             dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
340             dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
341             dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
342 
343         (t=dots).toUpper("lt");
344         if(t!=dotsLithuanian) {
345             errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
346         }
347 
348         (t=dots).toUpper("de");
349         if(t!=dotsDefault) {
350             errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
351         }
352 
353         // lithuanian adds dot above to i in lowercasing if there are more above accents
354         UnicodeString
355             i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
356             iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
357             iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
358 
359         (t=i).toLower("lt");
360         if(t!=iLithuanian) {
361             errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
362         }
363 
364         (t=i).toLower("de");
365         if(t!=iDefault) {
366             errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
367         }
368     }
369 
370 #endif
371 
372     // test case folding
373     {
374         UnicodeString
375             s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
376             f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
377             g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
378             t;
379 
380         (t=s).foldCase();
381         if(f!=t) {
382             errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
383         }
384 
385         // alternate handling for dotted I/dotless i (U+0130, U+0131)
386         (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
387         if(g!=t) {
388             errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
389         }
390     }
391 }
392 
393 // data-driven case mapping tests ------------------------------------------ ***
394 
395 enum {
396     TEST_LOWER,
397     TEST_UPPER,
398     TEST_TITLE,
399     TEST_FOLD,
400     TEST_COUNT
401 };
402 
403 // names of TestData children in casing.txt
404 static const char *const dataNames[TEST_COUNT+1]={
405     "lowercasing",
406     "uppercasing",
407     "titlecasing",
408     "casefolding",
409     ""
410 };
411 
412 void
TestCasingImpl(const UnicodeString & input,const UnicodeString & output,int32_t whichCase,void * iter,const char * localeID,uint32_t options)413 StringCaseTest::TestCasingImpl(const UnicodeString &input,
414                                const UnicodeString &output,
415                                int32_t whichCase,
416                                void *iter, const char *localeID, uint32_t options) {
417     // UnicodeString
418     UnicodeString result;
419     const char *name;
420     Locale locale(localeID);
421 
422     result=input;
423     switch(whichCase) {
424     case TEST_LOWER:
425         name="toLower";
426         result.toLower(locale);
427         break;
428     case TEST_UPPER:
429         name="toUpper";
430         result.toUpper(locale);
431         break;
432 #if !UCONFIG_NO_BREAK_ITERATION
433     case TEST_TITLE:
434         name="toTitle";
435         result.toTitle((BreakIterator *)iter, locale, options);
436         break;
437 #endif
438     case TEST_FOLD:
439         name="foldCase";
440         result.foldCase(options);
441         break;
442     default:
443         name="";
444         break; // won't happen
445     }
446     if(result!=output) {
447         dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
448     }
449 #if !UCONFIG_NO_BREAK_ITERATION
450     if(whichCase==TEST_TITLE && options==0) {
451         result=input;
452         result.toTitle((BreakIterator *)iter, locale);
453         if(result!=output) {
454             dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
455         }
456     }
457 #endif
458 
459     // UTF-8
460     char utf8In[100], utf8Out[100];
461     int32_t utf8InLength, utf8OutLength, resultLength;
462     UChar *buffer;
463 
464     IcuTestErrorCode errorCode(*this, "TestCasingImpl");
465     LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
466 #if !UCONFIG_NO_BREAK_ITERATION
467     if(iter!=NULL) {
468         // Clone the break iterator so that the UCaseMap can safely adopt it.
469         UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
470         ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
471     }
472 #endif
473 
474     u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
475     switch(whichCase) {
476     case TEST_LOWER:
477         name="ucasemap_utf8ToLower";
478         utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
479                     utf8Out, (int32_t)sizeof(utf8Out),
480                     utf8In, utf8InLength, errorCode);
481         break;
482     case TEST_UPPER:
483         name="ucasemap_utf8ToUpper";
484         utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
485                     utf8Out, (int32_t)sizeof(utf8Out),
486                     utf8In, utf8InLength, errorCode);
487         break;
488 #if !UCONFIG_NO_BREAK_ITERATION
489     case TEST_TITLE:
490         name="ucasemap_utf8ToTitle";
491         utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
492                     utf8Out, (int32_t)sizeof(utf8Out),
493                     utf8In, utf8InLength, errorCode);
494         break;
495 #endif
496     case TEST_FOLD:
497         name="ucasemap_utf8FoldCase";
498         utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
499                     utf8Out, (int32_t)sizeof(utf8Out),
500                     utf8In, utf8InLength, errorCode);
501         break;
502     default:
503         name="";
504         utf8OutLength=0;
505         break; // won't happen
506     }
507     buffer=result.getBuffer(utf8OutLength);
508     u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
509     result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
510 
511     if(errorCode.isFailure()) {
512         errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
513         errorCode.reset();
514     } else if(result!=output) {
515         errln("error: %s() got a wrong result for a test case from casing.res", name);
516         errln("expected \"" + output + "\" got \"" + result + "\"" );
517     }
518 }
519 
520 void
TestCasing()521 StringCaseTest::TestCasing() {
522     UErrorCode status = U_ZERO_ERROR;
523 #if !UCONFIG_NO_BREAK_ITERATION
524     LocalUBreakIteratorPointer iter;
525 #endif
526     char cLocaleID[100];
527     UnicodeString locale, input, output, optionsString, result;
528     uint32_t options;
529     int32_t whichCase, type;
530     LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
531     if(U_SUCCESS(status)) {
532         for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
533 #if UCONFIG_NO_BREAK_ITERATION
534             if(whichCase==TEST_TITLE) {
535                 continue;
536             }
537 #endif
538             LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
539             if(U_FAILURE(status)) {
540                 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
541                 break;
542             }
543             const DataMap *myCase = NULL;
544             while(casingTest->nextCase(myCase, status)) {
545                 input = myCase->getString("Input", status);
546                 output = myCase->getString("Output", status);
547 
548                 if(whichCase!=TEST_FOLD) {
549                     locale = myCase->getString("Locale", status);
550                 }
551                 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
552 
553 #if !UCONFIG_NO_BREAK_ITERATION
554                 if(whichCase==TEST_TITLE) {
555                     type = myCase->getInt("Type", status);
556                     if(type>=0) {
557                         iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
558                     } else if(type==-2) {
559                         // Open a trivial break iterator that only delivers { 0, length }
560                         // or even just { 0 } as boundaries.
561                         static const UChar rules[] = { 0x2e, 0x2a, 0x3b };  // ".*;"
562                         UParseError parseError;
563                         iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
564                     }
565                 }
566 #endif
567                 options = 0;
568                 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
569                     optionsString = myCase->getString("Options", status);
570                     if(optionsString.indexOf((UChar)0x54)>=0) {  // T
571                         options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
572                     }
573                     if(optionsString.indexOf((UChar)0x4c)>=0) {  // L
574                         options|=U_TITLECASE_NO_LOWERCASE;
575                     }
576                     if(optionsString.indexOf((UChar)0x41)>=0) {  // A
577                         options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
578                     }
579                 }
580 
581                 if(U_FAILURE(status)) {
582                     dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase],  u_errorName(status));
583                     status = U_ZERO_ERROR;
584                 } else {
585 #if UCONFIG_NO_BREAK_ITERATION
586                     LocalPointer<UMemory> iter;
587 #endif
588                     TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
589                 }
590 
591 #if !UCONFIG_NO_BREAK_ITERATION
592                 iter.adoptInstead(NULL);
593 #endif
594             }
595         }
596     }
597 
598 #if !UCONFIG_NO_BREAK_ITERATION
599     // more tests for API coverage
600     status=U_ZERO_ERROR;
601     input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
602     (result=input).toTitle(NULL);
603     if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
604         dataerrln("UnicodeString::toTitle(NULL) failed.");
605     }
606 #endif
607 }
608 
609 void
TestTitleOptions()610 StringCaseTest::TestTitleOptions() {
611     // New options in ICU 60.
612     TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE,
613                    nullptr, "", U_TITLECASE_WHOLE_STRING);
614     TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE,
615                    nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE);
616     TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE,
617                    nullptr, "", U_TITLECASE_WHOLE_STRING);
618     TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE,
619                    nullptr, "", U_TITLECASE_WHOLE_STRING);
620     TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE,
621                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
622     TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE,
623                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
624     TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE,
625                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE);
626     TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE,
627                    nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT);
628     TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE,
629                    nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING);
630     TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE,
631                    nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING);
632 
633 #if !UCONFIG_NO_BREAK_ITERATION
634     // Test conflicting settings.
635     // If & when we add more options, then the ORed combinations may become
636     // indistinguishable from valid values.
637     IcuTestErrorCode errorCode(*this, "TestTitleOptions");
638     CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr,
639                      u"", 0, nullptr, 0, nullptr, errorCode);
640     if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
641         errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
642               errorCode.errorName());
643     }
644     errorCode.reset();
645     CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr,
646                      u"", 0, nullptr, 0, nullptr, errorCode);
647     if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
648         errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
649               errorCode.errorName());
650     }
651     errorCode.reset();
652     LocalPointer<BreakIterator> iter(
653         BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode));
654     CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(),
655                      u"", 0, nullptr, 0, nullptr, errorCode);
656     if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
657         errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
658               errorCode.errorName());
659     }
660     errorCode.reset();
661 #endif
662 }
663 
664 void
TestFullCaseFoldingIterator()665 StringCaseTest::TestFullCaseFoldingIterator() {
666     UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
667     UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
668     FullCaseFoldingIterator iter;
669     int32_t count=0;
670     int32_t countSpecific=0;
671     UChar32 c;
672     UnicodeString full;
673     while((c=iter.next(full))>=0) {
674         ++count;
675         // Check that the full Case_Folding has more than 1 code point.
676         if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
677             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
678             continue;
679         }
680         // Check that full == Case_Folding(c).
681         UnicodeString cf(c);
682         cf.foldCase();
683         if(full!=cf) {
684             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
685             continue;
686         }
687         // Spot-check a couple of specific cases.
688         if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
689             ++countSpecific;
690         }
691     }
692     if(countSpecific!=3) {
693         errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
694     }
695     if(count<70) {
696         errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
697     }
698 }
699 
700 void
assertGreekUpper(const char16_t * s,const char16_t * expected)701 StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
702     UnicodeString s16(s);
703     UnicodeString expected16(expected);
704     UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
705     UnicodeString result16(s16);
706     result16.toUpper(GREEK_LOCALE_);
707     assertEquals(msg, expected16, result16);
708 
709     msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
710     int32_t length = expected16.length();
711     int32_t capacities[] = {
712         // Keep in sync with the UTF-8 capacities near the bottom of this function.
713         0, length / 2, length - 1, length, length + 1
714     };
715     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
716         int32_t cap = capacities[i];
717         UChar *dest16 = result16.getBuffer(expected16.length() + 1);
718         u_memset(dest16, 0x55AA, result16.getCapacity());
719         UErrorCode errorCode = U_ZERO_ERROR;
720         length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
721         assertEquals(msg + cap, expected16.length(), length);
722         UErrorCode expectedErrorCode;
723         if (cap < expected16.length()) {
724             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
725         } else if (cap == expected16.length()) {
726             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
727         } else {
728             expectedErrorCode = U_ZERO_ERROR;
729             assertEquals(msg + cap + " NUL", 0, dest16[length]);
730         }
731         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
732         result16.releaseBuffer(length);
733         if (cap >= expected16.length()) {
734             assertEquals(msg + cap, expected16, result16);
735         }
736     }
737 
738     UErrorCode errorCode = U_ZERO_ERROR;
739     LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
740     assertSuccess("ucasemap_open", errorCode);
741     std::string s8;
742     s16.toUTF8String(s8);
743     msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
744     char dest8[1000];
745     length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
746                                   s8.data(), s8.length(), &errorCode);
747     assertSuccess("ucasemap_utf8ToUpper", errorCode);
748     StringPiece result8(dest8, length);
749     UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
750     assertEquals(msg, expected16, result16From8);
751 
752     msg += " cap=";
753     capacities[1] = length / 2;
754     capacities[2] = length - 1;
755     capacities[3] = length;
756     capacities[4] = length + 1;
757     char dest8b[1000];
758     int32_t expected8Length = length;  // Assuming the previous call worked.
759     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
760         int32_t cap = capacities[i];
761         memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
762         UErrorCode errorCode = U_ZERO_ERROR;
763         length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
764                                       s8.data(), s8.length(), &errorCode);
765         assertEquals(msg + cap, expected8Length, length);
766         UErrorCode expectedErrorCode;
767         if (cap < expected8Length) {
768             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
769         } else if (cap == expected8Length) {
770             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
771         } else {
772             expectedErrorCode = U_ZERO_ERROR;
773             // Casts to int32_t to avoid matching UBool.
774             assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
775         }
776         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
777         if (cap >= expected8Length) {
778             assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
779         }
780     }
781 }
782 
783 void
TestGreekUpper()784 StringCaseTest::TestGreekUpper() {
785     // http://bugs.icu-project.org/trac/ticket/5456
786     assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
787     // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
788     // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
789     assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
790     assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
791     assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
792     assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
793     assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
794     assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
795     assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
796     // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
797     assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
798     assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
799     // http://unicode.org/udhr/d/udhr_ell_polytonic.html
800     assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
801     assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
802     // From Google bug report
803     assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
804     // http://crbug.com/234797
805     assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
806     assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
807     assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
808     // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
809     assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
810     assertGreekUpper(u"ή.", u"Ή.");
811 }
812 
813 void
TestLongUpper()814 StringCaseTest::TestLongUpper() {
815     if (quick) {
816         logln("not exhaustive mode: skipping this test");
817         return;
818     }
819     // Ticket #12663, crash with an extremely long string where
820     // U+0390 maps to 0399 0308 0301 so that the result is three times as long
821     // and overflows an int32_t.
822     int32_t length = 0x40000004;  // more than 1G UChars
823     UnicodeString s(length, (UChar32)0x390, length);
824     UnicodeString result;
825     UChar *dest = result.getBuffer(length + 1);
826     if (s.isBogus() || dest == NULL) {
827         logln("Out of memory, unable to run this test on this machine.");
828         return;
829     }
830     IcuTestErrorCode errorCode(*this, "TestLongUpper");
831     int32_t destLength = u_strToUpper(dest, result.getCapacity(),
832                                       s.getBuffer(), s.length(), "", errorCode);
833     result.releaseBuffer(destLength);
834     if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
835         errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
836               errorCode.errorName(), (long)destLength);
837     }
838 }
839 
TestMalformedUTF8()840 void StringCaseTest::TestMalformedUTF8() {
841     // ticket #12639
842     IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
843     LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
844     if (errorCode.isFailure()) {
845         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
846         return;
847     }
848     char src[1] = { (char)0x85 };  // malformed UTF-8
849     char dest[3] = { 0, 0, 0 };
850     int32_t destLength;
851 #if !UCONFIG_NO_BREAK_ITERATION
852     destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
853     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
854         errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
855               errorCode.errorName(), (int)destLength, dest[0]);
856     }
857 #endif
858 
859     errorCode.reset();
860     dest[0] = 0;
861     destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
862     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
863         errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
864               errorCode.errorName(), (int)destLength, dest[0]);
865     }
866 
867     errorCode.reset();
868     dest[0] = 0;
869     destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
870     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
871         errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
872               errorCode.errorName(), (int)destLength, dest[0]);
873     }
874 
875     errorCode.reset();
876     dest[0] = 0;
877     destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
878     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
879         errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
880               errorCode.errorName(), (int)destLength, dest[0]);
881     }
882 }
883 
TestBufferOverflow()884 void StringCaseTest::TestBufferOverflow() {
885     // Ticket #12849, incorrect result from Title Case preflight operation,
886     // when buffer overflow error is expected.
887     IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
888     LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
889     if (errorCode.isFailure()) {
890         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
891         return;
892     }
893 
894     UnicodeString data("hello world");
895     int32_t result;
896 #if !UCONFIG_NO_BREAK_ITERATION
897     result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
898     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
899         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
900               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
901               __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
902     }
903 #endif
904     errorCode.reset();
905 
906     std::string data_utf8;
907     data.toUTF8String(data_utf8);
908 #if !UCONFIG_NO_BREAK_ITERATION
909     result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), data_utf8.length(), errorCode);
910     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
911         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
912               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
913               __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
914     }
915 #endif
916     errorCode.reset();
917 }
918 
TestEdits()919 void StringCaseTest::TestEdits() {
920     IcuTestErrorCode errorCode(*this, "TestEdits");
921     Edits edits;
922     assertFalse("new Edits hasChanges", edits.hasChanges());
923     assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
924     assertEquals("new Edits", 0, edits.lengthDelta());
925     edits.addUnchanged(1);  // multiple unchanged ranges are combined
926     edits.addUnchanged(10000);  // too long, and they are split
927     edits.addReplace(0, 0);
928     edits.addUnchanged(2);
929     assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
930     assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
931     assertEquals("unchanged 10003", 0, edits.lengthDelta());
932     edits.addReplace(2, 1);  // multiple short equal-lengths edits are compressed
933     edits.addUnchanged(0);
934     edits.addReplace(2, 1);
935     edits.addReplace(2, 1);
936     edits.addReplace(0, 10);
937     edits.addReplace(100, 0);
938     edits.addReplace(3000, 4000);  // variable-length encoding
939     edits.addReplace(100000, 100000);
940     assertTrue("some edits hasChanges", edits.hasChanges());
941     assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
942     assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
943     UErrorCode outErrorCode = U_ZERO_ERROR;
944     assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
945 
946     static const EditChange coarseExpectedChanges[] = {
947             { FALSE, 10003, 10003 },
948             { TRUE, 103106, 104013 }
949     };
950     TestUtility::checkEditsIter(*this, u"coarse",
951             edits.getCoarseIterator(), edits.getCoarseIterator(),
952             coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
953     TestUtility::checkEditsIter(*this, u"coarse changes",
954             edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
955             coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
956 
957     static const EditChange fineExpectedChanges[] = {
958             { FALSE, 10003, 10003 },
959             { TRUE, 2, 1 },
960             { TRUE, 2, 1 },
961             { TRUE, 2, 1 },
962             { TRUE, 0, 10 },
963             { TRUE, 100, 0 },
964             { TRUE, 3000, 4000 },
965             { TRUE, 100000, 100000 }
966     };
967     TestUtility::checkEditsIter(*this, u"fine",
968             edits.getFineIterator(), edits.getFineIterator(),
969             fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
970     TestUtility::checkEditsIter(*this, u"fine changes",
971             edits.getFineChangesIterator(), edits.getFineChangesIterator(),
972             fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
973 
974     edits.reset();
975     assertFalse("reset hasChanges", edits.hasChanges());
976     assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
977     assertEquals("reset", 0, edits.lengthDelta());
978     Edits::Iterator ei = edits.getCoarseChangesIterator();
979     assertFalse("reset then iterator", ei.next(errorCode));
980 }
981 
TestCopyMoveEdits()982 void StringCaseTest::TestCopyMoveEdits() {
983     IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
984     // Exceed the stack array capacity.
985     Edits a;
986     for (int32_t i = 0; i < 250; ++i) {
987         a.addReplace(i % 10, (i % 10) + 1);
988     }
989     assertEquals("a: many edits, length delta", 250, a.lengthDelta());
990 
991     // copy
992     Edits b(a);
993     assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
994     assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
995     TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
996 
997     // assign
998     Edits c;
999     c.addUnchanged(99);
1000     c.addReplace(88, 77);
1001     c = b;
1002     assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
1003     assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
1004     TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
1005 
1006     // std::move trouble on these platforms.
1007     // See https://ssl.icu-project.org/trac/ticket/13393
1008 #if !UPRV_INCOMPLETE_CPP11_SUPPORT && !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390)
1009     // move constructor empties object with heap array
1010     Edits d(std::move(a));
1011     assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
1012     assertFalse("a moved away: no more hasChanges", a.hasChanges());
1013     TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
1014     Edits empty;
1015     TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
1016 
1017     // move assignment empties object with heap array
1018     Edits e;
1019     e.addReplace(0, 1000);
1020     e = std::move(b);
1021     assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
1022     assertFalse("b moved away: no more hasChanges", b.hasChanges());
1023     TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
1024     TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
1025 
1026     // Edits::Iterator default constructor.
1027     Edits::Iterator iter;
1028     assertFalse("Edits::Iterator().next()", iter.next(errorCode));
1029     assertSuccess("Edits::Iterator().next()", errorCode);
1030     iter = e.getFineChangesIterator();
1031     assertTrue("iter.next()", iter.next(errorCode));
1032     assertSuccess("iter.next()", errorCode);
1033     assertTrue("iter.hasChange()", iter.hasChange());
1034     assertEquals("iter.newLength()", 1, iter.newLength());
1035 #endif
1036 }
1037 
TestEditsFindFwdBwd()1038 void StringCaseTest::TestEditsFindFwdBwd() {
1039     IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd");
1040     // Some users need index mappings to be efficient when they are out of order.
1041     // The most interesting failure case for this test is it taking a very long time.
1042     Edits e;
1043     constexpr int32_t N = 200000;
1044     for (int32_t i = 0; i < N; ++i) {
1045         e.addUnchanged(1);
1046         e.addReplace(3, 1);
1047     }
1048     Edits::Iterator iter = e.getFineIterator();
1049     for (int32_t i = 0; i <= N; i += 2) {
1050         assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1051         assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1052     }
1053     for (int32_t i = N; i >= 0; i -= 2) {
1054         assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1055         assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1056     }
1057 }
1058 
TestMergeEdits()1059 void StringCaseTest::TestMergeEdits() {
1060     // For debugging, set -v to see matching edits up to a failure.
1061     IcuTestErrorCode errorCode(*this, "TestMergeEdits");
1062     Edits ab, bc, ac, expected_ac;
1063 
1064     // Simple: Two parallel non-changes.
1065     ab.addUnchanged(2);
1066     bc.addUnchanged(2);
1067     expected_ac.addUnchanged(2);
1068 
1069     // Simple: Two aligned changes.
1070     ab.addReplace(3, 2);
1071     bc.addReplace(2, 1);
1072     expected_ac.addReplace(3, 1);
1073 
1074     // Unequal non-changes.
1075     ab.addUnchanged(5);
1076     bc.addUnchanged(3);
1077     expected_ac.addUnchanged(3);
1078     // ab ahead by 2
1079 
1080     // Overlapping changes accumulate until they share a boundary.
1081     ab.addReplace(4, 3);
1082     bc.addReplace(3, 2);
1083     ab.addReplace(4, 3);
1084     bc.addReplace(3, 2);
1085     ab.addReplace(4, 3);
1086     bc.addReplace(3, 2);
1087     bc.addUnchanged(4);
1088     expected_ac.addReplace(14, 8);
1089     // bc ahead by 2
1090 
1091     // Balance out intermediate-string lengths.
1092     ab.addUnchanged(2);
1093     expected_ac.addUnchanged(2);
1094 
1095     // Insert something and delete it: Should disappear.
1096     ab.addReplace(0, 5);
1097     ab.addReplace(0, 2);
1098     bc.addReplace(7, 0);
1099 
1100     // Parallel change to make a new boundary.
1101     ab.addReplace(1, 2);
1102     bc.addReplace(2, 3);
1103     expected_ac.addReplace(1, 3);
1104 
1105     // Multiple ab deletions should remain separate at the boundary.
1106     ab.addReplace(1, 0);
1107     ab.addReplace(2, 0);
1108     ab.addReplace(3, 0);
1109     expected_ac.addReplace(1, 0);
1110     expected_ac.addReplace(2, 0);
1111     expected_ac.addReplace(3, 0);
1112 
1113     // Unequal non-changes can be split for another boundary.
1114     ab.addUnchanged(2);
1115     bc.addUnchanged(1);
1116     expected_ac.addUnchanged(1);
1117     // ab ahead by 1
1118 
1119     // Multiple bc insertions should create a boundary and remain separate.
1120     bc.addReplace(0, 4);
1121     bc.addReplace(0, 5);
1122     bc.addReplace(0, 6);
1123     expected_ac.addReplace(0, 4);
1124     expected_ac.addReplace(0, 5);
1125     expected_ac.addReplace(0, 6);
1126     // ab ahead by 1
1127 
1128     // Multiple ab deletions in the middle of a bc change are merged.
1129     bc.addReplace(2, 2);
1130     // bc ahead by 1
1131     ab.addReplace(1, 0);
1132     ab.addReplace(2, 0);
1133     ab.addReplace(3, 0);
1134     ab.addReplace(4, 1);
1135     expected_ac.addReplace(11, 2);
1136 
1137     // Multiple bc insertions in the middle of an ab change are merged.
1138     ab.addReplace(5, 6);
1139     bc.addReplace(3, 3);
1140     // ab ahead by 3
1141     bc.addReplace(0, 4);
1142     bc.addReplace(0, 5);
1143     bc.addReplace(0, 6);
1144     bc.addReplace(3, 7);
1145     expected_ac.addReplace(5, 25);
1146 
1147     // Delete around a deletion.
1148     ab.addReplace(4, 4);
1149     ab.addReplace(3, 0);
1150     ab.addUnchanged(2);
1151     bc.addReplace(2, 2);
1152     bc.addReplace(4, 0);
1153     expected_ac.addReplace(9, 2);
1154 
1155     // Insert into an insertion.
1156     ab.addReplace(0, 2);
1157     bc.addReplace(1, 1);
1158     bc.addReplace(0, 8);
1159     bc.addUnchanged(4);
1160     expected_ac.addReplace(0, 10);
1161     // bc ahead by 3
1162 
1163     // Balance out intermediate-string lengths.
1164     ab.addUnchanged(3);
1165     expected_ac.addUnchanged(3);
1166 
1167     // Deletions meet insertions.
1168     // Output order is arbitrary in principle, but we expect insertions first
1169     // and want to keep it that way.
1170     ab.addReplace(2, 0);
1171     ab.addReplace(4, 0);
1172     ab.addReplace(6, 0);
1173     bc.addReplace(0, 1);
1174     bc.addReplace(0, 3);
1175     bc.addReplace(0, 5);
1176     expected_ac.addReplace(0, 1);
1177     expected_ac.addReplace(0, 3);
1178     expected_ac.addReplace(0, 5);
1179     expected_ac.addReplace(2, 0);
1180     expected_ac.addReplace(4, 0);
1181     expected_ac.addReplace(6, 0);
1182 
1183     // End with a non-change, so that further edits are never reordered.
1184     ab.addUnchanged(1);
1185     bc.addUnchanged(1);
1186     expected_ac.addUnchanged(1);
1187 
1188     ac.mergeAndAppend(ab, bc, errorCode);
1189     assertSuccess("ab+bc", errorCode);
1190     if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
1191         return;
1192     }
1193 
1194     // Append more Edits.
1195     Edits ab2, bc2;
1196     ab2.addUnchanged(5);
1197     bc2.addReplace(1, 2);
1198     bc2.addUnchanged(4);
1199     expected_ac.addReplace(1, 2);
1200     expected_ac.addUnchanged(4);
1201     ac.mergeAndAppend(ab2, bc2, errorCode);
1202     assertSuccess("ab2+bc2", errorCode);
1203     if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
1204         return;
1205     }
1206 
1207     // Append empty edits.
1208     Edits empty;
1209     ac.mergeAndAppend(empty, empty, errorCode);
1210     assertSuccess("empty+empty", errorCode);
1211     if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
1212         return;
1213     }
1214 
1215     // Error: Append more edits with mismatched intermediate-string lengths.
1216     Edits mismatch;
1217     mismatch.addReplace(1, 1);
1218     ac.mergeAndAppend(ab2, mismatch, errorCode);
1219     assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1220     errorCode.reset();
1221     ac.mergeAndAppend(mismatch, bc2, errorCode);
1222     assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1223     errorCode.reset();
1224 }
1225 
TestCaseMapWithEdits()1226 void StringCaseTest::TestCaseMapWithEdits() {
1227     IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
1228     UChar dest[20];
1229     Edits edits;
1230 
1231     int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1232                                       u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1233     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1234     static const EditChange lowerExpectedChanges[] = {
1235             { TRUE, 1, 1 },
1236             { FALSE, 4, 4 },
1237             { TRUE, 1, 1 },
1238             { FALSE, 2, 2 }
1239     };
1240     TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1241             edits.getFineIterator(), edits.getFineIterator(),
1242             lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1243             TRUE, errorCode);
1244 
1245     edits.reset();
1246     length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1247                               u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1248     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1249     static const EditChange upperExpectedChanges[] = {
1250             { FALSE, 1, 1 },
1251             { TRUE, 1, 1 },
1252             { TRUE, 1, 1 },
1253             { TRUE, 1, 1 },
1254             { TRUE, 1, 1 },
1255             { TRUE, 1, 1 }
1256     };
1257     TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1258             edits.getFineIterator(), edits.getFineIterator(),
1259             upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1260             TRUE, errorCode);
1261 
1262     edits.reset();
1263 
1264 #if !UCONFIG_NO_BREAK_ITERATION
1265     length = CaseMap::toTitle("nl",
1266                               U_OMIT_UNCHANGED_TEXT |
1267                               U_TITLECASE_NO_BREAK_ADJUSTMENT |
1268                               U_TITLECASE_NO_LOWERCASE,
1269                               nullptr, u"IjssEL IglOo", 12,
1270                               dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1271     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1272     static const EditChange titleExpectedChanges[] = {
1273             { FALSE, 1, 1 },
1274             { TRUE, 1, 1 },
1275             { FALSE, 10, 10 }
1276     };
1277     TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1278             edits.getFineIterator(), edits.getFineIterator(),
1279             titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1280             TRUE, errorCode);
1281 #endif
1282 
1283     // No explicit nor automatic edits.reset(). Edits should be appended.
1284     length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1285                            u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1286     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1287     static const EditChange foldExpectedChanges[] = {
1288 #if !UCONFIG_NO_BREAK_ITERATION
1289             // From titlecasing.
1290             { FALSE, 1, 1 },
1291             { TRUE, 1, 1 },
1292             { FALSE, 10, 10 },
1293 #endif
1294             // From case folding.
1295             { TRUE, 1, 1 },
1296             { TRUE, 1, 2 },
1297             { FALSE, 3, 3 },
1298             { TRUE, 1, 1 },
1299             { FALSE, 2, 2 }
1300     };
1301     TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
1302             edits.getFineIterator(), edits.getFineIterator(),
1303             foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1304             TRUE, errorCode);
1305 }
1306 
TestCaseMapUTF8WithEdits()1307 void StringCaseTest::TestCaseMapUTF8WithEdits() {
1308     IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
1309     char dest[50];
1310     Edits edits;
1311 
1312     int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
1313                                           u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1314     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
1315                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1316     static const EditChange lowerExpectedChanges[] = {
1317             { TRUE, 1, 2 },
1318             { FALSE, 4, 4 },
1319             { TRUE, 1, 1 },
1320             { FALSE, 2, 2 }
1321     };
1322     TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1323             edits.getFineIterator(), edits.getFineIterator(),
1324             lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1325             TRUE, errorCode);
1326 
1327     edits.reset();
1328     length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
1329                                   u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1330     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1331                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1332     static const EditChange upperExpectedChanges[] = {
1333             { FALSE, 2, 2 },
1334             { TRUE, 2, 2 },
1335             { TRUE, 2, 2 },
1336             { TRUE, 2, 2 },
1337             { TRUE, 2, 2 },
1338             { TRUE, 2, 2 }
1339     };
1340     TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1341             edits.getFineIterator(), edits.getFineIterator(),
1342             upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1343             TRUE, errorCode);
1344 
1345     edits.reset();
1346 #if !UCONFIG_NO_BREAK_ITERATION
1347     length = CaseMap::utf8ToTitle("nl",
1348                                   U_OMIT_UNCHANGED_TEXT |
1349                                   U_TITLECASE_NO_BREAK_ADJUSTMENT |
1350                                   U_TITLECASE_NO_LOWERCASE,
1351                                   nullptr, u8"IjssEL IglOo", 12,
1352                                   dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1353     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1354                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1355     static const EditChange titleExpectedChanges[] = {
1356             { FALSE, 1, 1 },
1357             { TRUE, 1, 1 },
1358             { FALSE, 10, 10 }
1359     };
1360     TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1361             edits.getFineIterator(), edits.getFineIterator(),
1362             titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1363             TRUE, errorCode);
1364 #endif
1365 
1366     // No explicit nor automatic edits.reset(). Edits should be appended.
1367     length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
1368                                    U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1369                                u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1370     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1371                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1372     static const EditChange foldExpectedChanges[] = {
1373 #if !UCONFIG_NO_BREAK_ITERATION
1374             // From titlecasing.
1375             { FALSE, 1, 1 },
1376             { TRUE, 1, 1 },
1377             { FALSE, 10, 10 },
1378 #endif
1379             // From case folding.
1380             { TRUE, 1, 2 },
1381             { TRUE, 2, 2 },
1382             { FALSE, 3, 3 },
1383             { TRUE, 1, 1 },
1384             { FALSE, 2, 2 }
1385     };
1386     TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
1387             edits.getFineIterator(), edits.getFineIterator(),
1388             foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1389             TRUE, errorCode);
1390 }
1391 
TestCaseMapToString()1392 void StringCaseTest::TestCaseMapToString() {
1393     // This test function name is parallel with one in UCharacterCaseTest.java.
1394     // It is a bit of a misnomer until we have CaseMap API that writes to
1395     // a UnicodeString, at which point we should change this code here.
1396     IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
1397     UChar dest[20];
1398 
1399     // Omit unchanged text.
1400     int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1401                                       u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1402     assertEquals(u"toLower(IstanBul)",
1403                  UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1404     length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1405                               u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1406     assertEquals(u"toUpper(Πατάτα)",
1407                  UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1408 #if !UCONFIG_NO_BREAK_ITERATION
1409     length = CaseMap::toTitle("nl",
1410                               U_OMIT_UNCHANGED_TEXT |
1411                               U_TITLECASE_NO_BREAK_ADJUSTMENT |
1412                               U_TITLECASE_NO_LOWERCASE,
1413                               nullptr, u"IjssEL IglOo", 12,
1414                               dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1415     assertEquals(u"toTitle(IjssEL IglOo)",
1416                  UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1417 #endif
1418     length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1419                            u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1420     assertEquals(u"foldCase(IßtanBul)",
1421                  UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1422 
1423     // Return the whole result string.
1424     length = CaseMap::toLower("tr", 0,
1425                               u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1426     assertEquals(u"toLower(IstanBul)",
1427                  UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length));
1428     length = CaseMap::toUpper("el", 0,
1429                               u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1430     assertEquals(u"toUpper(Πατάτα)",
1431                  UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1432 #if !UCONFIG_NO_BREAK_ITERATION
1433     length = CaseMap::toTitle("nl",
1434                               U_TITLECASE_NO_BREAK_ADJUSTMENT |
1435                               U_TITLECASE_NO_LOWERCASE,
1436                               nullptr, u"IjssEL IglOo", 12,
1437                               dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1438     assertEquals(u"toTitle(IjssEL IglOo)",
1439                  UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length));
1440 #endif
1441     length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1442                            u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1443     assertEquals(u"foldCase(IßtanBul)",
1444                  UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length));
1445 }
1446 
TestCaseMapUTF8ToString()1447 void StringCaseTest::TestCaseMapUTF8ToString() {
1448     IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
1449     std::string dest;
1450     StringByteSink<std::string> sink(&dest);
1451 
1452     // Omit unchanged text.
1453     CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
1454     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
1455     dest.clear();
1456     CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
1457     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1458                  UnicodeString::fromUTF8(dest));
1459 #if !UCONFIG_NO_BREAK_ITERATION
1460     dest.clear();
1461     CaseMap::utf8ToTitle(
1462         "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1463         nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1464     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1465                  UnicodeString::fromUTF8(dest));
1466 #endif
1467     dest.clear();
1468     CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1469                       u8"IßtanBul", sink, nullptr, errorCode);
1470     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1471                  UnicodeString::fromUTF8(dest));
1472 
1473     // Return the whole result string.
1474     dest.clear();
1475     CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
1476     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
1477                  UnicodeString::fromUTF8(dest));
1478     dest.clear();
1479     CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
1480     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
1481                  UnicodeString::fromUTF8(dest));
1482 #if !UCONFIG_NO_BREAK_ITERATION
1483     dest.clear();
1484     CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1485                          nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1486     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
1487                  UnicodeString::fromUTF8(dest));
1488 #endif
1489     dest.clear();
1490     CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
1491     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
1492                  UnicodeString::fromUTF8(dest));
1493 }
1494 
TestLongUnicodeString()1495 void StringCaseTest::TestLongUnicodeString() {
1496     // Code coverage for UnicodeString case mapping code handling
1497     // long strings or many changes in a string.
1498     UnicodeString s(TRUE,
1499         (const UChar *)
1500         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1501         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1502         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1503         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1504         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1505         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1506     UnicodeString expected(TRUE,
1507         (const UChar *)
1508         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1509         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1510         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1511         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1512         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1513         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1514     s.toUpper(Locale::getRoot());
1515     assertEquals("string length 306", expected, s);
1516 }
1517 
1518 #if !UCONFIG_NO_BREAK_ITERATION
TestBug13127()1519 void StringCaseTest::TestBug13127() {
1520     // Test case crashed when the bug was present.
1521     const char16_t *s16 = u"日本語";
1522     UnicodeString s(TRUE, s16, -1);
1523     s.toTitle(0, Locale::getEnglish());
1524 }
1525 
TestInPlaceTitle()1526 void StringCaseTest::TestInPlaceTitle() {
1527     // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
1528     IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
1529     char16_t s[32] = u"ß ß ß日本語 abcdef";
1530     const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
1531     int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
1532     assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
1533     assertEquals("u_strToTitle(in-place)", expected, s);
1534 }
1535 #endif
1536