• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2005-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /************************************************************************
9 *   Tests for the UText and UTextIterator text abstraction classses
10 *
11 ************************************************************************/
12 
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include "unicode/utypes.h"
17 #include "unicode/utext.h"
18 #include "unicode/utf8.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchriter.h"
21 #include "cmemory.h"
22 #include "cstr.h"
23 #include "utxttest.h"
24 
25 static UBool  gFailed = FALSE;
26 static int    gTestNum = 0;
27 
28 // Forward decl
29 UText *openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status);
30 
31 #define TEST_ASSERT(x) \
32 { if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
33                      gFailed = TRUE;\
34    }}
35 
36 
37 #define TEST_SUCCESS(status) \
38 { if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
39        gTestNum, __FILE__, __LINE__, u_errorName(status)); \
40        gFailed = TRUE;\
41    }}
42 
UTextTest()43 UTextTest::UTextTest() {
44 }
45 
~UTextTest()46 UTextTest::~UTextTest() {
47 }
48 
49 
50 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)51 UTextTest::runIndexedTest(int32_t index, UBool exec,
52                           const char* &name, char* /*par*/) {
53     switch (index) {
54         case 0: name = "TextTest";
55             if (exec) TextTest();    break;
56         case 1: name = "ErrorTest";
57             if (exec) ErrorTest();   break;
58         case 2: name = "FreezeTest";
59             if (exec) FreezeTest();  break;
60         case 3: name = "Ticket5560";
61             if (exec) Ticket5560();  break;
62         case 4: name = "Ticket6847";
63             if (exec) Ticket6847();  break;
64         case 5: name = "Ticket10562";
65             if (exec) Ticket10562();  break;
66         case 6: name = "Ticket10983";
67             if (exec) Ticket10983();  break;
68         case 7: name = "Ticket12130";
69             if (exec) Ticket12130(); break;
70         default: name = "";          break;
71     }
72 }
73 
74 //
75 // Quick and dirty random number generator.
76 //   (don't use library so that results are portable.
77 static uint32_t m_seed = 1;
m_rand()78 static uint32_t m_rand()
79 {
80     m_seed = m_seed * 1103515245 + 12345;
81     return (uint32_t)(m_seed/65536) % 32768;
82 }
83 
84 
85 //
86 //   TextTest()
87 //
88 //       Top Level function for UText testing.
89 //       Specifies the strings to be tested, with the acutal testing itself
90 //       being carried out in another function, TestString().
91 //
TextTest()92 void  UTextTest::TextTest() {
93     int32_t i, j;
94 
95     TestString("abcd\\U00010001xyz");
96     TestString("");
97 
98     // Supplementary chars at start or end
99     TestString("\\U00010001");
100     TestString("abc\\U00010001");
101     TestString("\\U00010001abc");
102 
103     // Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
104     UnicodeString s;
105     for (i=1; i<60; i++) {
106         s.truncate(0);
107         for (j=0; j<i; j++) {
108             if (j+0x30 == 0x5c) {
109                 // backslash.  Needs to be escaped
110                 s.append((UChar)0x5c);
111             }
112             s.append(UChar(j+0x30));
113         }
114         TestString(s);
115     }
116 
117    // Test strings with odd-aligned supplementary chars,
118    //    looking for glitches at buffer boundaries
119     for (i=1; i<60; i++) {
120         s.truncate(0);
121         s.append((UChar)0x41);
122         for (j=0; j<i; j++) {
123             s.append(UChar32(j+0x11000));
124         }
125         TestString(s);
126     }
127 
128     // String of chars of randomly varying size in utf-8 representation.
129     //   Exercise the mapping, and the varying sized buffer.
130     //
131     s.truncate(0);
132     UChar32  c1 = 0;
133     UChar32  c2 = 0x100;
134     UChar32  c3 = 0xa000;
135     UChar32  c4 = 0x11000;
136     for (i=0; i<1000; i++) {
137         int len8 = m_rand()%4 + 1;
138         switch (len8) {
139             case 1:
140                 c1 = (c1+1)%0x80;
141                 // don't put 0 into string (0 terminated strings for some tests)
142                 // don't put '\', will cause unescape() to fail.
143                 if (c1==0x5c || c1==0) {
144                     c1++;
145                 }
146                 s.append(c1);
147                 break;
148             case 2:
149                 s.append(c2++);
150                 break;
151             case 3:
152                 s.append(c3++);
153                 break;
154             case 4:
155                 s.append(c4++);
156                 break;
157         }
158     }
159     TestString(s);
160 }
161 
162 
163 //
164 //  TestString()     Run a suite of UText tests on a string.
165 //                   The test string is unescaped before use.
166 //
TestString(const UnicodeString & s)167 void UTextTest::TestString(const UnicodeString &s) {
168     int32_t       i;
169     int32_t       j;
170     UChar32       c;
171     int32_t       cpCount = 0;
172     UErrorCode    status  = U_ZERO_ERROR;
173     UText        *ut      = NULL;
174     int32_t       saLen;
175 
176     UnicodeString sa = s.unescape();
177     saLen = sa.length();
178 
179     //
180     // Build up a mapping between code points and UTF-16 code unit indexes.
181     //
182     m *cpMap = new m[sa.length() + 1];
183     j = 0;
184     for (i=0; i<sa.length(); i=sa.moveIndex32(i, 1)) {
185         c = sa.char32At(i);
186         cpMap[j].nativeIdx = i;
187         cpMap[j].cp = c;
188         j++;
189         cpCount++;
190     }
191     cpMap[j].nativeIdx = i;   // position following the last char in utf-16 string.
192 
193 
194     // UChar * test, null terminated
195     status = U_ZERO_ERROR;
196     UChar *buf = new UChar[saLen+1];
197     sa.extract(buf, saLen+1, status);
198     TEST_SUCCESS(status);
199     ut = utext_openUChars(NULL, buf, -1, &status);
200     TEST_SUCCESS(status);
201     TestAccess(sa, ut, cpCount, cpMap);
202     utext_close(ut);
203     delete [] buf;
204 
205     // UChar * test, with length
206     status = U_ZERO_ERROR;
207     buf = new UChar[saLen+1];
208     sa.extract(buf, saLen+1, status);
209     TEST_SUCCESS(status);
210     ut = utext_openUChars(NULL, buf, saLen, &status);
211     TEST_SUCCESS(status);
212     TestAccess(sa, ut, cpCount, cpMap);
213     utext_close(ut);
214     delete [] buf;
215 
216 
217     // UnicodeString test
218     status = U_ZERO_ERROR;
219     ut = utext_openUnicodeString(NULL, &sa, &status);
220     TEST_SUCCESS(status);
221     TestAccess(sa, ut, cpCount, cpMap);
222     TestCMR(sa, ut, cpCount, cpMap, cpMap);
223     utext_close(ut);
224 
225 
226     // Const UnicodeString test
227     status = U_ZERO_ERROR;
228     ut = utext_openConstUnicodeString(NULL, &sa, &status);
229     TEST_SUCCESS(status);
230     TestAccess(sa, ut, cpCount, cpMap);
231     utext_close(ut);
232 
233 
234     // Replaceable test.  (UnicodeString inherits Replaceable)
235     status = U_ZERO_ERROR;
236     ut = utext_openReplaceable(NULL, &sa, &status);
237     TEST_SUCCESS(status);
238     TestAccess(sa, ut, cpCount, cpMap);
239     TestCMR(sa, ut, cpCount, cpMap, cpMap);
240     utext_close(ut);
241 
242     // Character Iterator Tests
243     status = U_ZERO_ERROR;
244     const UChar *cbuf = sa.getBuffer();
245     CharacterIterator *ci = new UCharCharacterIterator(cbuf, saLen, status);
246     TEST_SUCCESS(status);
247     ut = utext_openCharacterIterator(NULL, ci, &status);
248     TEST_SUCCESS(status);
249     TestAccess(sa, ut, cpCount, cpMap);
250     utext_close(ut);
251     delete ci;
252 
253 
254     // Fragmented UnicodeString  (Chunk size of one)
255     //
256     status = U_ZERO_ERROR;
257     ut = openFragmentedUnicodeString(NULL, &sa, &status);
258     TEST_SUCCESS(status);
259     TestAccess(sa, ut, cpCount, cpMap);
260     utext_close(ut);
261 
262     //
263     // UTF-8 test
264     //
265 
266     // Convert the test string from UnicodeString to (char *) in utf-8 format
267     int32_t u8Len = sa.extract(0, sa.length(), NULL, 0, "utf-8");
268     char *u8String = new char[u8Len + 1];
269     sa.extract(0, sa.length(), u8String, u8Len+1, "utf-8");
270 
271     // Build up the map of code point indices in the utf-8 string
272     m * u8Map = new m[sa.length() + 1];
273     i = 0;   // native utf-8 index
274     for (j=0; j<cpCount ; j++) {  // code point number
275         u8Map[j].nativeIdx = i;
276         U8_NEXT(u8String, i, u8Len, c)
277         u8Map[j].cp = c;
278     }
279     u8Map[cpCount].nativeIdx = u8Len;   // position following the last char in utf-8 string.
280 
281     // Do the test itself
282     status = U_ZERO_ERROR;
283     ut = utext_openUTF8(NULL, u8String, -1, &status);
284     TEST_SUCCESS(status);
285     TestAccess(sa, ut, cpCount, u8Map);
286     utext_close(ut);
287 
288 
289 
290     delete []cpMap;
291     delete []u8Map;
292     delete []u8String;
293 }
294 
295 //  TestCMR   test Copy, Move and Replace operations.
296 //              us         UnicodeString containing the test text.
297 //              ut         UText containing the same test text.
298 //              cpCount    number of code points in the test text.
299 //              nativeMap  Mapping from code points to native indexes for the UText.
300 //              u16Map     Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
301 //
302 //     This function runs a whole series of opertions on each incoming UText.
303 //     The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
304 //
TestCMR(const UnicodeString & us,UText * ut,int cpCount,m * nativeMap,m * u16Map)305 void UTextTest::TestCMR(const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *u16Map) {
306     TEST_ASSERT(utext_isWritable(ut) == TRUE);
307 
308     int  srcLengthType;       // Loop variables for selecting the postion and length
309     int  srcPosType;          //   of the block to operate on within the source text.
310     int  destPosType;
311 
312     int  srcIndex  = 0;       // Code Point indexes of the block to operate on for
313     int  srcLength = 0;       //   a specific test.
314 
315     int  destIndex = 0;       // Code point index of the destination for a copy/move test.
316 
317     int32_t  nativeStart = 0; // Native unit indexes for a test.
318     int32_t  nativeLimit = 0;
319     int32_t  nativeDest  = 0;
320 
321     int32_t  u16Start    = 0; // UTF-16 indexes for a test.
322     int32_t  u16Limit    = 0; //   used when performing the same operation in a Unicode String
323     int32_t  u16Dest     = 0;
324 
325     // Iterate over a whole series of source index, length and a target indexes.
326     // This is done with code point indexes; these will be later translated to native
327     //   indexes using the cpMap.
328     for (srcLengthType=1; srcLengthType<=3; srcLengthType++) {
329         switch (srcLengthType) {
330             case 1: srcLength = 1; break;
331             case 2: srcLength = 5; break;
332             case 3: srcLength = cpCount / 3;
333         }
334         for (srcPosType=1; srcPosType<=5; srcPosType++) {
335             switch (srcPosType) {
336                 case 1: srcIndex = 0; break;
337                 case 2: srcIndex = 1; break;
338                 case 3: srcIndex = cpCount - srcLength; break;
339                 case 4: srcIndex = cpCount - srcLength - 1; break;
340                 case 5: srcIndex = cpCount / 2; break;
341             }
342             if (srcIndex < 0 || srcIndex + srcLength > cpCount) {
343                 // filter out bogus test cases -
344                 //   those with a source range that falls of an edge of the string.
345                 continue;
346             }
347 
348             //
349             // Copy and move tests.
350             //   iterate over a variety of destination positions.
351             //
352             for (destPosType=1; destPosType<=4; destPosType++) {
353                 switch (destPosType) {
354                     case 1: destIndex = 0; break;
355                     case 2: destIndex = 1; break;
356                     case 3: destIndex = srcIndex - 1; break;
357                     case 4: destIndex = srcIndex + srcLength + 1; break;
358                     case 5: destIndex = cpCount-1; break;
359                     case 6: destIndex = cpCount; break;
360                 }
361                 if (destIndex<0 || destIndex>cpCount) {
362                     // filter out bogus test cases.
363                     continue;
364                 }
365 
366                 nativeStart = nativeMap[srcIndex].nativeIdx;
367                 nativeLimit = nativeMap[srcIndex+srcLength].nativeIdx;
368                 nativeDest  = nativeMap[destIndex].nativeIdx;
369 
370                 u16Start    = u16Map[srcIndex].nativeIdx;
371                 u16Limit    = u16Map[srcIndex+srcLength].nativeIdx;
372                 u16Dest     = u16Map[destIndex].nativeIdx;
373 
374                 gFailed = FALSE;
375                 TestCopyMove(us, ut, FALSE,
376                     nativeStart, nativeLimit, nativeDest,
377                     u16Start, u16Limit, u16Dest);
378 
379                 TestCopyMove(us, ut, TRUE,
380                     nativeStart, nativeLimit, nativeDest,
381                     u16Start, u16Limit, u16Dest);
382 
383                 if (gFailed) {
384                     return;
385                 }
386             }
387 
388             //
389             //  Replace tests.
390             //
391             UnicodeString fullRepString("This is an arbitrary string that will be used as replacement text");
392             for (int32_t replStrLen=0; replStrLen<20; replStrLen++) {
393                 UnicodeString repStr(fullRepString, 0, replStrLen);
394                 TestReplace(us, ut,
395                     nativeStart, nativeLimit,
396                     u16Start, u16Limit,
397                     repStr);
398                 if (gFailed) {
399                     return;
400                 }
401             }
402 
403         }
404     }
405 
406 }
407 
408 //
409 //   TestCopyMove    run a single test case for utext_copy.
410 //                   Test cases are created in TestCMR and dispatched here for execution.
411 //
TestCopyMove(const UnicodeString & us,UText * ut,UBool move,int32_t nativeStart,int32_t nativeLimit,int32_t nativeDest,int32_t u16Start,int32_t u16Limit,int32_t u16Dest)412 void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
413                     int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
414                     int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
415 {
416     UErrorCode      status   = U_ZERO_ERROR;
417     UText          *targetUT = NULL;
418     gTestNum++;
419     gFailed = FALSE;
420 
421     //
422     //  clone the UText.  The test will be run in the cloned copy
423     //  so that we don't alter the original.
424     //
425     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
426     TEST_SUCCESS(status);
427     UnicodeString targetUS(us);    // And copy the reference string.
428 
429     // do the test operation first in the reference
430     targetUS.copy(u16Start, u16Limit, u16Dest);
431     if (move) {
432         // delete out the source range.
433         if (u16Limit < u16Dest) {
434             targetUS.removeBetween(u16Start, u16Limit);
435         } else {
436             int32_t amtCopied = u16Limit - u16Start;
437             targetUS.removeBetween(u16Start+amtCopied, u16Limit+amtCopied);
438         }
439     }
440 
441     // Do the same operation in the UText under test
442     utext_copy(targetUT, nativeStart, nativeLimit, nativeDest, move, &status);
443     if (nativeDest > nativeStart && nativeDest < nativeLimit) {
444         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
445     } else {
446         TEST_SUCCESS(status);
447 
448         // Compare the results of the two parallel tests
449         int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
450         int64_t  uti = 0;    // UText position, native index.
451         int32_t  cpi;        // char32 position (code point index)
452         UChar32  usc;        // code point from Unicode String
453         UChar32  utc;        // code point from UText
454         utext_setNativeIndex(targetUT, 0);
455         for (cpi=0; ; cpi++) {
456             usc = targetUS.char32At(usi);
457             utc = utext_next32(targetUT);
458             if (utc < 0) {
459                 break;
460             }
461             TEST_ASSERT(uti == usi);
462             TEST_ASSERT(utc == usc);
463             usi = targetUS.moveIndex32(usi, 1);
464             uti = utext_getNativeIndex(targetUT);
465             if (gFailed) {
466                 goto cleanupAndReturn;
467             }
468         }
469         int64_t expectedNativeLength = utext_nativeLength(ut);
470         if (move == FALSE) {
471             expectedNativeLength += nativeLimit - nativeStart;
472         }
473         uti = utext_getNativeIndex(targetUT);
474         TEST_ASSERT(uti == expectedNativeLength);
475     }
476 
477 cleanupAndReturn:
478     utext_close(targetUT);
479 }
480 
481 
482 //
483 //  TestReplace   Test a single Replace operation.
484 //
TestReplace(const UnicodeString & us,UText * ut,int32_t nativeStart,int32_t nativeLimit,int32_t u16Start,int32_t u16Limit,const UnicodeString & repStr)485 void UTextTest::TestReplace(
486             const UnicodeString &us,     // reference UnicodeString in which to do the replace
487             UText         *ut,                // UnicodeText object under test.
488             int32_t       nativeStart,        // Range to be replaced, in UText native units.
489             int32_t       nativeLimit,
490             int32_t       u16Start,           // Range to be replaced, in UTF-16 units
491             int32_t       u16Limit,           //    for use in the reference UnicodeString.
492             const UnicodeString &repStr)      // The replacement string
493 {
494     UErrorCode      status   = U_ZERO_ERROR;
495     UText          *targetUT = NULL;
496     gTestNum++;
497     gFailed = FALSE;
498 
499     //
500     //  clone the target UText.  The test will be run in the cloned copy
501     //  so that we don't alter the original.
502     //
503     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
504     TEST_SUCCESS(status);
505     UnicodeString targetUS(us);    // And copy the reference string.
506 
507     //
508     // Do the replace operation in the Unicode String, to
509     //   produce a reference result.
510     //
511     targetUS.replace(u16Start, u16Limit-u16Start, repStr);
512 
513     //
514     // Do the replace on the UText under test
515     //
516     const UChar *rs = repStr.getBuffer();
517     int32_t  rsLen = repStr.length();
518     int32_t actualDelta = utext_replace(targetUT, nativeStart, nativeLimit, rs, rsLen, &status);
519     int32_t expectedDelta = repStr.length() - (nativeLimit - nativeStart);
520     TEST_ASSERT(actualDelta == expectedDelta);
521 
522     //
523     // Compare the results
524     //
525     int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
526     int64_t  uti = 0;    // UText position, native index.
527     int32_t  cpi;        // char32 position (code point index)
528     UChar32  usc;        // code point from Unicode String
529     UChar32  utc;        // code point from UText
530     int64_t  expectedNativeLength = 0;
531     utext_setNativeIndex(targetUT, 0);
532     for (cpi=0; ; cpi++) {
533         usc = targetUS.char32At(usi);
534         utc = utext_next32(targetUT);
535         if (utc < 0) {
536             break;
537         }
538         TEST_ASSERT(uti == usi);
539         TEST_ASSERT(utc == usc);
540         usi = targetUS.moveIndex32(usi, 1);
541         uti = utext_getNativeIndex(targetUT);
542         if (gFailed) {
543             goto cleanupAndReturn;
544         }
545     }
546     expectedNativeLength = utext_nativeLength(ut) + expectedDelta;
547     uti = utext_getNativeIndex(targetUT);
548     TEST_ASSERT(uti == expectedNativeLength);
549 
550 cleanupAndReturn:
551     utext_close(targetUT);
552 }
553 
554 //
555 //  TestAccess      Test the read only access functions on a UText, including cloning.
556 //                  The text is accessed in a variety of ways, and compared with
557 //                  the reference UnicodeString.
558 //
TestAccess(const UnicodeString & us,UText * ut,int cpCount,m * cpMap)559 void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
560     // Run the standard tests on the caller-supplied UText.
561     TestAccessNoClone(us, ut, cpCount, cpMap);
562 
563     // Re-run tests on a shallow clone.
564     utext_setNativeIndex(ut, 0);
565     UErrorCode status = U_ZERO_ERROR;
566     UText *shallowClone = utext_clone(NULL, ut, FALSE /*deep*/, FALSE /*readOnly*/, &status);
567     TEST_SUCCESS(status);
568     TestAccessNoClone(us, shallowClone, cpCount, cpMap);
569 
570     //
571     // Rerun again on a deep clone.
572     // Note that text providers are not required to provide deep cloning,
573     //   so unsupported errors are ignored.
574     //
575     status = U_ZERO_ERROR;
576     utext_setNativeIndex(shallowClone, 0);
577     UText *deepClone = utext_clone(NULL, shallowClone, TRUE, FALSE, &status);
578     utext_close(shallowClone);
579     if (status != U_UNSUPPORTED_ERROR) {
580         TEST_SUCCESS(status);
581         TestAccessNoClone(us, deepClone, cpCount, cpMap);
582     }
583     utext_close(deepClone);
584 }
585 
586 
587 //
588 //  TestAccessNoClone()    Test the read only access functions on a UText.
589 //                         The text is accessed in a variety of ways, and compared with
590 //                         the reference UnicodeString.
591 //
TestAccessNoClone(const UnicodeString & us,UText * ut,int cpCount,m * cpMap)592 void UTextTest::TestAccessNoClone(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
593     UErrorCode  status = U_ZERO_ERROR;
594     gTestNum++;
595 
596     //
597     //  Check the length from the UText
598     //
599     int64_t expectedLen = cpMap[cpCount].nativeIdx;
600     int64_t utlen = utext_nativeLength(ut);
601     TEST_ASSERT(expectedLen == utlen);
602 
603     //
604     //  Iterate forwards, verify that we get the correct code points
605     //   at the correct native offsets.
606     //
607     int         i = 0;
608     int64_t     index;
609     int64_t     expectedIndex = 0;
610     int64_t     foundIndex = 0;
611     UChar32     expectedC;
612     UChar32     foundC;
613     int64_t     len;
614 
615     for (i=0; i<cpCount; i++) {
616         expectedIndex = cpMap[i].nativeIdx;
617         foundIndex    = utext_getNativeIndex(ut);
618         TEST_ASSERT(expectedIndex == foundIndex);
619         expectedC     = cpMap[i].cp;
620         foundC        = utext_next32(ut);
621         TEST_ASSERT(expectedC == foundC);
622         foundIndex    = utext_getPreviousNativeIndex(ut);
623         TEST_ASSERT(expectedIndex == foundIndex);
624         if (gFailed) {
625             return;
626         }
627     }
628     foundC = utext_next32(ut);
629     TEST_ASSERT(foundC == U_SENTINEL);
630 
631     // Repeat above, using macros
632     utext_setNativeIndex(ut, 0);
633     for (i=0; i<cpCount; i++) {
634         expectedIndex = cpMap[i].nativeIdx;
635         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
636         TEST_ASSERT(expectedIndex == foundIndex);
637         expectedC     = cpMap[i].cp;
638         foundC        = UTEXT_NEXT32(ut);
639         TEST_ASSERT(expectedC == foundC);
640         if (gFailed) {
641             return;
642         }
643     }
644     foundC = UTEXT_NEXT32(ut);
645     TEST_ASSERT(foundC == U_SENTINEL);
646 
647     //
648     //  Forward iteration (above) should have left index at the
649     //   end of the input, which should == length().
650     //
651     len = utext_nativeLength(ut);
652     foundIndex  = utext_getNativeIndex(ut);
653     TEST_ASSERT(len == foundIndex);
654 
655     //
656     // Iterate backwards over entire test string
657     //
658     len = utext_getNativeIndex(ut);
659     utext_setNativeIndex(ut, len);
660     for (i=cpCount-1; i>=0; i--) {
661         expectedC     = cpMap[i].cp;
662         expectedIndex = cpMap[i].nativeIdx;
663         int64_t prevIndex = utext_getPreviousNativeIndex(ut);
664         foundC        = utext_previous32(ut);
665         foundIndex    = utext_getNativeIndex(ut);
666         TEST_ASSERT(expectedIndex == foundIndex);
667         TEST_ASSERT(expectedC == foundC);
668         TEST_ASSERT(prevIndex == foundIndex);
669         if (gFailed) {
670             return;
671         }
672     }
673 
674     //
675     //  Backwards iteration, above, should have left our iterator
676     //   position at zero, and continued backwards iterationshould fail.
677     //
678     foundIndex = utext_getNativeIndex(ut);
679     TEST_ASSERT(foundIndex == 0);
680     foundIndex = utext_getPreviousNativeIndex(ut);
681     TEST_ASSERT(foundIndex == 0);
682 
683 
684     foundC = utext_previous32(ut);
685     TEST_ASSERT(foundC == U_SENTINEL);
686     foundIndex = utext_getNativeIndex(ut);
687     TEST_ASSERT(foundIndex == 0);
688     foundIndex = utext_getPreviousNativeIndex(ut);
689     TEST_ASSERT(foundIndex == 0);
690 
691 
692     // And again, with the macros
693     utext_setNativeIndex(ut, len);
694     for (i=cpCount-1; i>=0; i--) {
695         expectedC     = cpMap[i].cp;
696         expectedIndex = cpMap[i].nativeIdx;
697         foundC        = UTEXT_PREVIOUS32(ut);
698         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
699         TEST_ASSERT(expectedIndex == foundIndex);
700         TEST_ASSERT(expectedC == foundC);
701         if (gFailed) {
702             return;
703         }
704     }
705 
706     //
707     //  Backwards iteration, above, should have left our iterator
708     //   position at zero, and continued backwards iterationshould fail.
709     //
710     foundIndex = UTEXT_GETNATIVEINDEX(ut);
711     TEST_ASSERT(foundIndex == 0);
712 
713     foundC = UTEXT_PREVIOUS32(ut);
714     TEST_ASSERT(foundC == U_SENTINEL);
715     foundIndex = UTEXT_GETNATIVEINDEX(ut);
716     TEST_ASSERT(foundIndex == 0);
717     if (gFailed) {
718         return;
719     }
720 
721     //
722     //  next32From(), prevous32From(), Iterate in a somewhat random order.
723     //
724     int  cpIndex = 0;
725     for (i=0; i<cpCount; i++) {
726         cpIndex = (cpIndex + 9973) % cpCount;
727         index         = cpMap[cpIndex].nativeIdx;
728         expectedC     = cpMap[cpIndex].cp;
729         foundC        = utext_next32From(ut, index);
730         TEST_ASSERT(expectedC == foundC);
731         if (gFailed) {
732             return;
733         }
734     }
735 
736     cpIndex = 0;
737     for (i=0; i<cpCount; i++) {
738         cpIndex = (cpIndex + 9973) % cpCount;
739         index         = cpMap[cpIndex+1].nativeIdx;
740         expectedC     = cpMap[cpIndex].cp;
741         foundC        = utext_previous32From(ut, index);
742         TEST_ASSERT(expectedC == foundC);
743         if (gFailed) {
744             return;
745         }
746     }
747 
748 
749     //
750     // moveIndex(int32_t delta);
751     //
752 
753     // Walk through frontwards, incrementing by one
754     utext_setNativeIndex(ut, 0);
755     for (i=1; i<=cpCount; i++) {
756         utext_moveIndex32(ut, 1);
757         index = utext_getNativeIndex(ut);
758         expectedIndex = cpMap[i].nativeIdx;
759         TEST_ASSERT(expectedIndex == index);
760         index = UTEXT_GETNATIVEINDEX(ut);
761         TEST_ASSERT(expectedIndex == index);
762     }
763 
764     // Walk through frontwards, incrementing by two
765     utext_setNativeIndex(ut, 0);
766     for (i=2; i<cpCount; i+=2) {
767         utext_moveIndex32(ut, 2);
768         index = utext_getNativeIndex(ut);
769         expectedIndex = cpMap[i].nativeIdx;
770         TEST_ASSERT(expectedIndex == index);
771         index = UTEXT_GETNATIVEINDEX(ut);
772         TEST_ASSERT(expectedIndex == index);
773     }
774 
775     // walk through the string backwards, decrementing by one.
776     i = cpMap[cpCount].nativeIdx;
777     utext_setNativeIndex(ut, i);
778     for (i=cpCount; i>=0; i--) {
779         expectedIndex = cpMap[i].nativeIdx;
780         index = utext_getNativeIndex(ut);
781         TEST_ASSERT(expectedIndex == index);
782         index = UTEXT_GETNATIVEINDEX(ut);
783         TEST_ASSERT(expectedIndex == index);
784         utext_moveIndex32(ut, -1);
785     }
786 
787 
788     // walk through backwards, decrementing by three
789     i = cpMap[cpCount].nativeIdx;
790     utext_setNativeIndex(ut, i);
791     for (i=cpCount; i>=0; i-=3) {
792         expectedIndex = cpMap[i].nativeIdx;
793         index = utext_getNativeIndex(ut);
794         TEST_ASSERT(expectedIndex == index);
795         index = UTEXT_GETNATIVEINDEX(ut);
796         TEST_ASSERT(expectedIndex == index);
797         utext_moveIndex32(ut, -3);
798     }
799 
800 
801     //
802     // Extract
803     //
804     int bufSize = us.length() + 10;
805     UChar *buf = new UChar[bufSize];
806     status = U_ZERO_ERROR;
807     expectedLen = us.length();
808     len = utext_extract(ut, 0, utlen, buf, bufSize, &status);
809     TEST_SUCCESS(status);
810     TEST_ASSERT(len == expectedLen);
811     int compareResult = us.compare(buf, -1);
812     TEST_ASSERT(compareResult == 0);
813 
814     status = U_ZERO_ERROR;
815     len = utext_extract(ut, 0, utlen, NULL, 0, &status);
816     if (utlen == 0) {
817         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
818     } else {
819         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
820     }
821     TEST_ASSERT(len == expectedLen);
822 
823     status = U_ZERO_ERROR;
824     u_memset(buf, 0x5555, bufSize);
825     len = utext_extract(ut, 0, utlen, buf, 1, &status);
826     if (us.length() == 0) {
827         TEST_SUCCESS(status);
828         TEST_ASSERT(buf[0] == 0);
829     } else {
830         // Buf len == 1, extracting a single 16 bit value.
831         // If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
832         //   or whether the lead surrogate of the pair is extracted.
833         //   It's a buffer overflow error in either case.
834         TEST_ASSERT(buf[0] == us.charAt(0) ||
835                     (buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0))));
836         TEST_ASSERT(buf[1] == 0x5555);
837         if (us.length() == 1) {
838             TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
839         } else {
840             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
841         }
842     }
843 
844     delete []buf;
845 }
846 
847 //
848 //  ErrorTest()    Check various error and edge cases.
849 //
ErrorTest()850 void UTextTest::ErrorTest()
851 {
852     // Close of an unitialized UText.  Shouldn't blow up.
853     {
854         UText  ut;
855         memset(&ut, 0, sizeof(UText));
856         utext_close(&ut);
857         utext_close(NULL);
858     }
859 
860     // Double-close of a UText.  Shouldn't blow up.  UText should still be usable.
861     {
862         UErrorCode status = U_ZERO_ERROR;
863         UText ut = UTEXT_INITIALIZER;
864         UnicodeString s("Hello, World");
865         UText *ut2 = utext_openUnicodeString(&ut, &s, &status);
866         TEST_SUCCESS(status);
867         TEST_ASSERT(ut2 == &ut);
868 
869         UText *ut3 = utext_close(&ut);
870         TEST_ASSERT(ut3 == &ut);
871 
872         UText *ut4 = utext_close(&ut);
873         TEST_ASSERT(ut4 == &ut);
874 
875         utext_openUnicodeString(&ut, &s, &status);
876         TEST_SUCCESS(status);
877         utext_close(&ut);
878     }
879 
880     // Re-use of a UText, chaining through each of the types of UText
881     //   (If it doesn't blow up, and doesn't leak, it's probably working fine)
882     {
883         UErrorCode status = U_ZERO_ERROR;
884         UText ut = UTEXT_INITIALIZER;
885         UText  *utp;
886         UnicodeString s1("Hello, World");
887         UChar s2[] = {(UChar)0x41, (UChar)0x42, (UChar)0};
888         const char  *s3 = "\x66\x67\x68";
889 
890         utp = utext_openUnicodeString(&ut, &s1, &status);
891         TEST_SUCCESS(status);
892         TEST_ASSERT(utp == &ut);
893 
894         utp = utext_openConstUnicodeString(&ut, &s1, &status);
895         TEST_SUCCESS(status);
896         TEST_ASSERT(utp == &ut);
897 
898         utp = utext_openUTF8(&ut, s3, -1, &status);
899         TEST_SUCCESS(status);
900         TEST_ASSERT(utp == &ut);
901 
902         utp = utext_openUChars(&ut, s2, -1, &status);
903         TEST_SUCCESS(status);
904         TEST_ASSERT(utp == &ut);
905 
906         utp = utext_close(&ut);
907         TEST_ASSERT(utp == &ut);
908 
909         utp = utext_openUnicodeString(&ut, &s1, &status);
910         TEST_SUCCESS(status);
911         TEST_ASSERT(utp == &ut);
912     }
913 
914     // Invalid parameters on open
915     //
916     {
917         UErrorCode status = U_ZERO_ERROR;
918         UText ut = UTEXT_INITIALIZER;
919 
920         utext_openUChars(&ut, NULL, 5, &status);
921         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
922 
923         status = U_ZERO_ERROR;
924         utext_openUChars(&ut, NULL, -1, &status);
925         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
926 
927         status = U_ZERO_ERROR;
928         utext_openUTF8(&ut, NULL, 4, &status);
929         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
930 
931         status = U_ZERO_ERROR;
932         utext_openUTF8(&ut, NULL, -1, &status);
933         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
934     }
935 
936     //
937     //  UTF-8 with malformed sequences.
938     //    These should come through as the Unicode replacement char, \ufffd
939     //
940     {
941         UErrorCode status = U_ZERO_ERROR;
942         UText *ut = NULL;
943         const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
944         UChar32  c;
945 
946         ut = utext_openUTF8(NULL, badUTF8, -1, &status);
947         TEST_SUCCESS(status);
948         c = utext_char32At(ut, 1);
949         TEST_ASSERT(c == 0xfffd);
950         c = utext_char32At(ut, 3);
951         TEST_ASSERT(c == 0xfffd);
952         c = utext_char32At(ut, 5);
953         TEST_ASSERT(c == 0xfffd);
954         c = utext_char32At(ut, 6);
955         TEST_ASSERT(c == 0x43);
956 
957         UChar buf[10];
958         int n = utext_extract(ut, 0, 9, buf, 10, &status);
959         TEST_SUCCESS(status);
960         TEST_ASSERT(n==5);
961         TEST_ASSERT(buf[1] == 0xfffd);
962         TEST_ASSERT(buf[3] == 0xfffd);
963         TEST_ASSERT(buf[2] == 0x42);
964         utext_close(ut);
965     }
966 
967 
968     //
969     //  isLengthExpensive - does it make the exptected transitions after
970     //                      getting the length of a nul terminated string?
971     //
972     {
973         UErrorCode status = U_ZERO_ERROR;
974         UnicodeString sa("Hello, this is a string");
975         UBool  isExpensive;
976 
977         UChar sb[100];
978         memset(sb, 0x20, sizeof(sb));
979         sb[99] = 0;
980 
981         UText *uta = utext_openUnicodeString(NULL, &sa, &status);
982         TEST_SUCCESS(status);
983         isExpensive = utext_isLengthExpensive(uta);
984         TEST_ASSERT(isExpensive == FALSE);
985         utext_close(uta);
986 
987         UText *utb = utext_openUChars(NULL, sb, -1, &status);
988         TEST_SUCCESS(status);
989         isExpensive = utext_isLengthExpensive(utb);
990         TEST_ASSERT(isExpensive == TRUE);
991         int64_t  len = utext_nativeLength(utb);
992         TEST_ASSERT(len == 99);
993         isExpensive = utext_isLengthExpensive(utb);
994         TEST_ASSERT(isExpensive == FALSE);
995         utext_close(utb);
996     }
997 
998     //
999     // Index to positions not on code point boundaries.
1000     //
1001     {
1002         const char *u8str =         "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
1003         int32_t startMap[] =        {   0,  0,  2,  2,  2,  5,  5,  5,  5,  9,  9};
1004         int32_t nextMap[]  =        {   2,  2,  5,  5,  5,  9,  9,  9,  9,  9,  9};
1005         int32_t prevMap[]  =        {   0,  0,  0,  0,  0,  2,  2,  2,  2,  5,  5};
1006         UChar32  c32Map[] =    {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
1007         UChar32  pr32Map[] =   {    -1,   -1,  0x201,  0x201,  0x201,   0x1083,   0x1083,   0x1083,   0x1083, 0x044146, 0x044146};
1008 
1009         // extractLen is the size, in UChars, of what will be extracted between index and index+1.
1010         //  is zero when both index positions lie within the same code point.
1011         int32_t  exLen[] =          {   0,  1,   0,  0,  1,  0,  0,  0,  2,  0,  0};
1012 
1013 
1014         UErrorCode status = U_ZERO_ERROR;
1015         UText *ut = utext_openUTF8(NULL, u8str, -1, &status);
1016         TEST_SUCCESS(status);
1017 
1018         // Check setIndex
1019         int32_t i;
1020         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
1021         for (i=0; i<startMapLimit; i++) {
1022             utext_setNativeIndex(ut, i);
1023             int64_t cpIndex = utext_getNativeIndex(ut);
1024             TEST_ASSERT(cpIndex == startMap[i]);
1025             cpIndex = UTEXT_GETNATIVEINDEX(ut);
1026             TEST_ASSERT(cpIndex == startMap[i]);
1027         }
1028 
1029         // Check char32At
1030         for (i=0; i<startMapLimit; i++) {
1031             UChar32 c32 = utext_char32At(ut, i);
1032             TEST_ASSERT(c32 == c32Map[i]);
1033             int64_t cpIndex = utext_getNativeIndex(ut);
1034             TEST_ASSERT(cpIndex == startMap[i]);
1035         }
1036 
1037         // Check utext_next32From
1038         for (i=0; i<startMapLimit; i++) {
1039             UChar32 c32 = utext_next32From(ut, i);
1040             TEST_ASSERT(c32 == c32Map[i]);
1041             int64_t cpIndex = utext_getNativeIndex(ut);
1042             TEST_ASSERT(cpIndex == nextMap[i]);
1043         }
1044 
1045         // check utext_previous32From
1046         for (i=0; i<startMapLimit; i++) {
1047             gTestNum++;
1048             UChar32 c32 = utext_previous32From(ut, i);
1049             TEST_ASSERT(c32 == pr32Map[i]);
1050             int64_t cpIndex = utext_getNativeIndex(ut);
1051             TEST_ASSERT(cpIndex == prevMap[i]);
1052         }
1053 
1054         // check Extract
1055         //   Extract from i to i+1, which may be zero or one code points,
1056         //     depending on whether the indices straddle a cp boundary.
1057         for (i=0; i<startMapLimit; i++) {
1058             UChar buf[3];
1059             status = U_ZERO_ERROR;
1060             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1061             TEST_SUCCESS(status);
1062             TEST_ASSERT(extractedLen == exLen[i]);
1063             if (extractedLen > 0) {
1064                 UChar32  c32;
1065                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1066                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1067                 TEST_ASSERT(c32 == c32Map[i]);
1068             }
1069         }
1070 
1071         utext_close(ut);
1072     }
1073 
1074 
1075     {    //  Similar test, with utf16 instead of utf8
1076          //  TODO:  merge the common parts of these tests.
1077 
1078         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
1079         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
1080         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
1081         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
1082         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
1083         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
1084         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
1085 
1086         u16str = u16str.unescape();
1087         UErrorCode status = U_ZERO_ERROR;
1088         UText *ut = utext_openUnicodeString(NULL, &u16str, &status);
1089         TEST_SUCCESS(status);
1090 
1091         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
1092         int i;
1093         for (i=0; i<startMapLimit; i++) {
1094             utext_setNativeIndex(ut, i);
1095             int64_t cpIndex = utext_getNativeIndex(ut);
1096             TEST_ASSERT(cpIndex == startMap[i]);
1097         }
1098 
1099         // Check char32At
1100         for (i=0; i<startMapLimit; i++) {
1101             UChar32 c32 = utext_char32At(ut, i);
1102             TEST_ASSERT(c32 == c32Map[i]);
1103             int64_t cpIndex = utext_getNativeIndex(ut);
1104             TEST_ASSERT(cpIndex == startMap[i]);
1105         }
1106 
1107         // Check utext_next32From
1108         for (i=0; i<startMapLimit; i++) {
1109             UChar32 c32 = utext_next32From(ut, i);
1110             TEST_ASSERT(c32 == c32Map[i]);
1111             int64_t cpIndex = utext_getNativeIndex(ut);
1112             TEST_ASSERT(cpIndex == nextMap[i]);
1113         }
1114 
1115         // check utext_previous32From
1116         for (i=0; i<startMapLimit; i++) {
1117             UChar32 c32 = utext_previous32From(ut, i);
1118             TEST_ASSERT(c32 == pr32Map[i]);
1119             int64_t cpIndex = utext_getNativeIndex(ut);
1120             TEST_ASSERT(cpIndex == prevMap[i]);
1121         }
1122 
1123         // check Extract
1124         //   Extract from i to i+1, which may be zero or one code points,
1125         //     depending on whether the indices straddle a cp boundary.
1126         for (i=0; i<startMapLimit; i++) {
1127             UChar buf[3];
1128             status = U_ZERO_ERROR;
1129             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1130             TEST_SUCCESS(status);
1131             TEST_ASSERT(extractedLen == exLen[i]);
1132             if (extractedLen > 0) {
1133                 UChar32  c32;
1134                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1135                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1136                 TEST_ASSERT(c32 == c32Map[i]);
1137             }
1138         }
1139 
1140         utext_close(ut);
1141     }
1142 
1143     {    //  Similar test, with UText over Replaceable
1144          //  TODO:  merge the common parts of these tests.
1145 
1146         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
1147         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
1148         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
1149         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
1150         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
1151         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
1152         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
1153 
1154         u16str = u16str.unescape();
1155         UErrorCode status = U_ZERO_ERROR;
1156         UText *ut = utext_openReplaceable(NULL, &u16str, &status);
1157         TEST_SUCCESS(status);
1158 
1159         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
1160         int i;
1161         for (i=0; i<startMapLimit; i++) {
1162             utext_setNativeIndex(ut, i);
1163             int64_t cpIndex = utext_getNativeIndex(ut);
1164             TEST_ASSERT(cpIndex == startMap[i]);
1165         }
1166 
1167         // Check char32At
1168         for (i=0; i<startMapLimit; i++) {
1169             UChar32 c32 = utext_char32At(ut, i);
1170             TEST_ASSERT(c32 == c32Map[i]);
1171             int64_t cpIndex = utext_getNativeIndex(ut);
1172             TEST_ASSERT(cpIndex == startMap[i]);
1173         }
1174 
1175         // Check utext_next32From
1176         for (i=0; i<startMapLimit; i++) {
1177             UChar32 c32 = utext_next32From(ut, i);
1178             TEST_ASSERT(c32 == c32Map[i]);
1179             int64_t cpIndex = utext_getNativeIndex(ut);
1180             TEST_ASSERT(cpIndex == nextMap[i]);
1181         }
1182 
1183         // check utext_previous32From
1184         for (i=0; i<startMapLimit; i++) {
1185             UChar32 c32 = utext_previous32From(ut, i);
1186             TEST_ASSERT(c32 == pr32Map[i]);
1187             int64_t cpIndex = utext_getNativeIndex(ut);
1188             TEST_ASSERT(cpIndex == prevMap[i]);
1189         }
1190 
1191         // check Extract
1192         //   Extract from i to i+1, which may be zero or one code points,
1193         //     depending on whether the indices straddle a cp boundary.
1194         for (i=0; i<startMapLimit; i++) {
1195             UChar buf[3];
1196             status = U_ZERO_ERROR;
1197             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1198             TEST_SUCCESS(status);
1199             TEST_ASSERT(extractedLen == exLen[i]);
1200             if (extractedLen > 0) {
1201                 UChar32  c32;
1202                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1203                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1204                 TEST_ASSERT(c32 == c32Map[i]);
1205             }
1206         }
1207 
1208         utext_close(ut);
1209     }
1210 }
1211 
1212 
FreezeTest()1213 void UTextTest::FreezeTest() {
1214     // Check isWritable() and freeze() behavior.
1215     //
1216 
1217     UnicodeString  ustr("Hello, World.");
1218     const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
1219     const UChar u16str[] = {(UChar)0x31, (UChar)0x32, (UChar)0x44, 0};
1220 
1221     UErrorCode status = U_ZERO_ERROR;
1222     UText  *ut        = NULL;
1223     UText  *ut2       = NULL;
1224 
1225     ut = utext_openUTF8(ut, u8str, -1, &status);
1226     TEST_SUCCESS(status);
1227     UBool writable = utext_isWritable(ut);
1228     TEST_ASSERT(writable == FALSE);
1229     utext_copy(ut, 1, 2, 0, TRUE, &status);
1230     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1231 
1232     status = U_ZERO_ERROR;
1233     ut = utext_openUChars(ut, u16str, -1, &status);
1234     TEST_SUCCESS(status);
1235     writable = utext_isWritable(ut);
1236     TEST_ASSERT(writable == FALSE);
1237     utext_copy(ut, 1, 2, 0, TRUE, &status);
1238     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1239 
1240     status = U_ZERO_ERROR;
1241     ut = utext_openUnicodeString(ut, &ustr, &status);
1242     TEST_SUCCESS(status);
1243     writable = utext_isWritable(ut);
1244     TEST_ASSERT(writable == TRUE);
1245     utext_freeze(ut);
1246     writable = utext_isWritable(ut);
1247     TEST_ASSERT(writable == FALSE);
1248     utext_copy(ut, 1, 2, 0, TRUE, &status);
1249     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1250 
1251     status = U_ZERO_ERROR;
1252     ut = utext_openUnicodeString(ut, &ustr, &status);
1253     TEST_SUCCESS(status);
1254     ut2 = utext_clone(ut2, ut, FALSE, FALSE, &status);  // clone with readonly = false
1255     TEST_SUCCESS(status);
1256     writable = utext_isWritable(ut2);
1257     TEST_ASSERT(writable == TRUE);
1258     ut2 = utext_clone(ut2, ut, FALSE, TRUE, &status);  // clone with readonly = true
1259     TEST_SUCCESS(status);
1260     writable = utext_isWritable(ut2);
1261     TEST_ASSERT(writable == FALSE);
1262     utext_copy(ut2, 1, 2, 0, TRUE, &status);
1263     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1264 
1265     status = U_ZERO_ERROR;
1266     ut = utext_openConstUnicodeString(ut, (const UnicodeString *)&ustr, &status);
1267     TEST_SUCCESS(status);
1268     writable = utext_isWritable(ut);
1269     TEST_ASSERT(writable == FALSE);
1270     utext_copy(ut, 1, 2, 0, TRUE, &status);
1271     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1272 
1273     // Deep Clone of a frozen UText should re-enable writing in the copy.
1274     status = U_ZERO_ERROR;
1275     ut = utext_openUnicodeString(ut, &ustr, &status);
1276     TEST_SUCCESS(status);
1277     utext_freeze(ut);
1278     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
1279     TEST_SUCCESS(status);
1280     writable = utext_isWritable(ut2);
1281     TEST_ASSERT(writable == TRUE);
1282 
1283 
1284     // Deep clone of a frozen UText, where the base type is intrinsically non-writable,
1285     //  should NOT enable writing in the copy.
1286     status = U_ZERO_ERROR;
1287     ut = utext_openUChars(ut, u16str, -1, &status);
1288     TEST_SUCCESS(status);
1289     utext_freeze(ut);
1290     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
1291     TEST_SUCCESS(status);
1292     writable = utext_isWritable(ut2);
1293     TEST_ASSERT(writable == FALSE);
1294 
1295     // cleanup
1296     utext_close(ut);
1297     utext_close(ut2);
1298 }
1299 
1300 
1301 //
1302 //  Fragmented UText
1303 //      A UText type that works with a chunk size of 1.
1304 //      Intended to test for edge cases.
1305 //      Input comes from a UnicodeString.
1306 //
1307 //       ut.b    the character.  Put into both halves.
1308 //
1309 
1310 U_CDECL_BEGIN
1311 static UBool U_CALLCONV
fragTextAccess(UText * ut,int64_t index,UBool forward)1312 fragTextAccess(UText *ut, int64_t index, UBool forward) {
1313     const UnicodeString *us = (const UnicodeString *)ut->context;
1314     UChar  c;
1315     int32_t length = us->length();
1316     if (forward && index>=0 && index<length) {
1317         c = us->charAt((int32_t)index);
1318         ut->b = c | c<<16;
1319         ut->chunkOffset = 0;
1320         ut->chunkLength = 1;
1321         ut->chunkNativeStart = index;
1322         ut->chunkNativeLimit = index+1;
1323         return true;
1324     }
1325     if (!forward && index>0 && index <=length) {
1326         c = us->charAt((int32_t)index-1);
1327         ut->b = c | c<<16;
1328         ut->chunkOffset = 1;
1329         ut->chunkLength = 1;
1330         ut->chunkNativeStart = index-1;
1331         ut->chunkNativeLimit = index;
1332         return true;
1333     }
1334     ut->b = 0;
1335     ut->chunkOffset = 0;
1336     ut->chunkLength = 0;
1337     if (index <= 0) {
1338         ut->chunkNativeStart = 0;
1339         ut->chunkNativeLimit = 0;
1340     } else {
1341         ut->chunkNativeStart = length;
1342         ut->chunkNativeLimit = length;
1343     }
1344     return false;
1345 }
1346 
1347 // Function table to be used with this fragmented text provider.
1348 //   Initialized in the open function.
1349 static UTextFuncs  fragmentFuncs;
1350 
1351 // Clone function for fragmented text provider.
1352 //   Didn't really want to provide this, but it's easier to provide it than to keep it
1353 //   out of the tests.
1354 //
1355 UText *
cloneFragmentedUnicodeString(UText * dest,const UText * src,UBool deep,UErrorCode * status)1356 cloneFragmentedUnicodeString(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
1357     if (U_FAILURE(*status)) {
1358         return NULL;
1359     }
1360     if (deep) {
1361         *status = U_UNSUPPORTED_ERROR;
1362         return NULL;
1363     }
1364     dest = utext_openUnicodeString(dest, (UnicodeString *)src->context, status);
1365     utext_setNativeIndex(dest, utext_getNativeIndex(src));
1366     return dest;
1367 }
1368 
1369 U_CDECL_END
1370 
1371 // Open function for the fragmented text provider.
1372 UText *
openFragmentedUnicodeString(UText * ut,UnicodeString * s,UErrorCode * status)1373 openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
1374     ut = utext_openUnicodeString(ut, s, status);
1375     if (U_FAILURE(*status)) {
1376         return ut;
1377     }
1378 
1379     // Copy of the function table from the stock UnicodeString UText,
1380     //   and replace the entry for the access function.
1381     memcpy(&fragmentFuncs, ut->pFuncs, sizeof(fragmentFuncs));
1382     fragmentFuncs.access = fragTextAccess;
1383     fragmentFuncs.clone  = cloneFragmentedUnicodeString;
1384     ut->pFuncs = &fragmentFuncs;
1385 
1386     ut->chunkContents = (UChar *)&ut->b;
1387     ut->pFuncs->access(ut, 0, TRUE);
1388     return ut;
1389 }
1390 
1391 // Regression test for Ticket 5560
1392 //   Clone fails to update chunkContentPointer in the cloned copy.
1393 //   This is only an issue for UText types that work in a local buffer,
1394 //      (UTF-8 wrapper, for example)
1395 //
1396 //   The test:
1397 //     1.  Create an inital UText
1398 //     2.  Deep clone it.  Contents should match original.
1399 //     3.  Reset original to something different.
1400 //     4.  Check that clone contents did not change.
1401 //
Ticket5560()1402 void UTextTest::Ticket5560() {
1403     /* The following two strings are in UTF-8 even on EBCDIC platforms. */
1404     static const char s1[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
1405     static const char s2[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
1406 	UErrorCode status = U_ZERO_ERROR;
1407 
1408 	UText ut1 = UTEXT_INITIALIZER;
1409 	UText ut2 = UTEXT_INITIALIZER;
1410 
1411 	utext_openUTF8(&ut1, s1, -1, &status);
1412 	UChar c = utext_next32(&ut1);
1413 	TEST_ASSERT(c == 0x41);  // c == 'A'
1414 
1415 	utext_clone(&ut2, &ut1, TRUE, FALSE, &status);
1416 	TEST_SUCCESS(status);
1417     c = utext_next32(&ut2);
1418 	TEST_ASSERT(c == 0x42);  // c == 'B'
1419     c = utext_next32(&ut1);
1420 	TEST_ASSERT(c == 0x42);  // c == 'B'
1421 
1422 	utext_openUTF8(&ut1, s2, -1, &status);
1423 	c = utext_next32(&ut1);
1424 	TEST_ASSERT(c == 0x31);  // c == '1'
1425     c = utext_next32(&ut2);
1426 	TEST_ASSERT(c == 0x43);  // c == 'C'
1427 
1428     utext_close(&ut1);
1429     utext_close(&ut2);
1430 }
1431 
1432 
1433 // Test for Ticket 6847
1434 //
Ticket6847()1435 void UTextTest::Ticket6847() {
1436     const int STRLEN = 90;
1437     UChar s[STRLEN+1];
1438     u_memset(s, 0x41, STRLEN);
1439     s[STRLEN] = 0;
1440 
1441     UErrorCode status = U_ZERO_ERROR;
1442     UText *ut = utext_openUChars(NULL, s, -1, &status);
1443 
1444     utext_setNativeIndex(ut, 0);
1445     int32_t count = 0;
1446     UChar32 c = 0;
1447     int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1448     TEST_ASSERT(nativeIndex == 0);
1449     while ((c = utext_next32(ut)) != U_SENTINEL) {
1450         TEST_ASSERT(c == 0x41);
1451         TEST_ASSERT(count < STRLEN);
1452         if (count >= STRLEN) {
1453             break;
1454         }
1455         count++;
1456         nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1457         TEST_ASSERT(nativeIndex == count);
1458     }
1459     TEST_ASSERT(count == STRLEN);
1460     nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1461     TEST_ASSERT(nativeIndex == STRLEN);
1462     utext_close(ut);
1463 }
1464 
1465 
Ticket10562()1466 void UTextTest::Ticket10562() {
1467     // Note: failures show as a heap error when the test is run under valgrind.
1468     UErrorCode status = U_ZERO_ERROR;
1469 
1470     const char *utf8_string = "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
1471     UText *utf8Text = utext_openUTF8(NULL, utf8_string, -1, &status);
1472     TEST_SUCCESS(status);
1473     UText *deepClone = utext_clone(NULL, utf8Text, TRUE, FALSE, &status);
1474     TEST_SUCCESS(status);
1475     UText *shallowClone = utext_clone(NULL, deepClone, FALSE, FALSE, &status);
1476     TEST_SUCCESS(status);
1477     utext_close(shallowClone);
1478     utext_close(deepClone);
1479     utext_close(utf8Text);
1480 
1481     status = U_ZERO_ERROR;
1482     UnicodeString usString("Hello, World.");
1483     UText *usText = utext_openUnicodeString(NULL, &usString, &status);
1484     TEST_SUCCESS(status);
1485     UText *usDeepClone = utext_clone(NULL, usText, TRUE, FALSE, &status);
1486     TEST_SUCCESS(status);
1487     UText *usShallowClone = utext_clone(NULL, usDeepClone, FALSE, FALSE, &status);
1488     TEST_SUCCESS(status);
1489     utext_close(usShallowClone);
1490     utext_close(usDeepClone);
1491     utext_close(usText);
1492 }
1493 
1494 
Ticket10983()1495 void UTextTest::Ticket10983() {
1496     // Note: failure shows as a seg fault when the defect is present.
1497 
1498     UErrorCode status = U_ZERO_ERROR;
1499     UnicodeString s("Hello, World");
1500     UText *ut = utext_openConstUnicodeString(NULL, &s, &status);
1501     TEST_SUCCESS(status);
1502 
1503     status = U_INVALID_STATE_ERROR;
1504     UText *cloned = utext_clone(NULL, ut, TRUE, TRUE, &status);
1505     TEST_ASSERT(cloned == NULL);
1506     TEST_ASSERT(status == U_INVALID_STATE_ERROR);
1507 
1508     utext_close(ut);
1509 }
1510 
1511 // Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
1512 //                leaves the iteration position set incorrectly when the
1513 //                actual string length is not yet known.
1514 //
1515 //                The test text needs to be long enough that UText defers getting the length.
1516 
Ticket12130()1517 void UTextTest::Ticket12130() {
1518     UErrorCode status = U_ZERO_ERROR;
1519 
1520     const char *text8 =
1521         "Fundamentally, computers just deal with numbers. They store letters and other characters "
1522         "by assigning a number for each one. Before Unicode was invented, there were hundreds "
1523         "of different encoding systems for assigning these numbers. No single encoding could "
1524         "contain enough characters: for example, the European Union alone requires several "
1525         "different encodings to cover all its languages. Even for a single language like "
1526         "English no single encoding was adequate for all the letters, punctuation, and technical "
1527         "symbols in common use.";
1528 
1529     UnicodeString str(text8);
1530     const UChar *ustr = str.getTerminatedBuffer();
1531     UText ut = UTEXT_INITIALIZER;
1532     utext_openUChars(&ut, ustr, -1, &status);
1533     UChar extractBuffer[50];
1534 
1535     for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
1536         int32_t endIdx = startIdx + 20;
1537 
1538         u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
1539         utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
1540         if (U_FAILURE(status)) {
1541             errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
1542             return;
1543         }
1544         int64_t ni  = utext_getNativeIndex(&ut);
1545         int64_t expectedni = startIdx + 20;
1546         if (expectedni > str.length()) {
1547             expectedni = str.length();
1548         }
1549         if (expectedni != ni) {
1550             errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
1551         }
1552         if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
1553             errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
1554                     __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
1555         }
1556     }
1557     utext_close(&ut);
1558 
1559     // Similar utext extract, this time with the string length provided to the UText in advance,
1560     // and a buffer of larger than required capacity.
1561 
1562     utext_openUChars(&ut, ustr, str.length(), &status);
1563     for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
1564         int32_t endIdx = startIdx + 20;
1565         u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
1566         utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
1567         if (U_FAILURE(status)) {
1568             errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
1569             return;
1570         }
1571         int64_t ni  = utext_getNativeIndex(&ut);
1572         int64_t expectedni = startIdx + 20;
1573         if (expectedni > str.length()) {
1574             expectedni = str.length();
1575         }
1576         if (expectedni != ni) {
1577             errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
1578         }
1579         if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
1580             errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
1581                     __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
1582         }
1583     }
1584     utext_close(&ut);
1585 }
1586