• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 *     synwee                      added test for quick check
16 *     synwee                      added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 
25 #if !UCONFIG_NO_NORMALIZATION
26 
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33 
34 static void
35 TestAPI(void);
36 
37 static void
38 TestNormCoverage(void);
39 
40 static void
41 TestConcatenate(void);
42 
43 static void
44 TestNextPrevious(void);
45 
46 static void TestIsNormalized(void);
47 
48 static void
49 TestFCNFKCClosure(void);
50 
51 static void
52 TestQuickCheckPerCP(void);
53 
54 static void
55 TestComposition(void);
56 
57 static void
58 TestFCD(void);
59 
60 static void
61 TestGetDecomposition(void);
62 
63 static void
64 TestGetRawDecomposition(void);
65 
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68 
69 static const char* const canonTests[][3] = {
70     /* Input*/                    /*Decomposed*/                /*Composed*/
71     { "cat",                    "cat",                        "cat"                    },
72     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
73 
74     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
75     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
76 
77     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
78     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
79     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
80 
81     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
82     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
83 
84     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
85     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
86     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
87 
88     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
89     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
90 
91     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
92     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
93 
94     { "Henry IV",                "Henry IV",                    "Henry IV"                },
95     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
96 
97     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
98     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
99     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
100     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
101     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
102     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
103     { "", "", "" }
104 };
105 
106 static const char* const compatTests[][3] = {
107     /* Input*/                        /*Decomposed    */                /*Composed*/
108     { "cat",                        "cat",                            "cat"                },
109 
110     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
111 
112     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
113     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
114 
115     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
116     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
117 
118     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
119     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
120 
121     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
122 
123     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
125     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
126     { "", "", "" }
127 };
128 
129 static const char* const fcdTests[][3] = {
130     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
132     { "\\u010e", "\\u010e", NULL }  /* D-caron */
133 };
134 
135 void addNormTest(TestNode** root);
136 
addNormTest(TestNode ** root)137 void addNormTest(TestNode** root)
138 {
139     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 }
160 
161 static const char* const modeStrings[]={
162     "UNORM_NONE",
163     "UNORM_NFD",
164     "UNORM_NFKD",
165     "UNORM_NFC",
166     "UNORM_NFKC",
167     "UNORM_FCD",
168     "UNORM_MODE_COUNT"
169 };
170 
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)171 static void TestNormCases(UNormalizationMode mode,
172                           const char* const cases[][3], int32_t lengthOfCases) {
173     int32_t x, neededLen, length2;
174     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
175     UChar *source=NULL;
176     UChar result[16];
177     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
178     for(x=0; x < lengthOfCases; x++)
179     {
180         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
181         source=CharsToUChars(cases[x][0]);
182         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
183         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
184         if(neededLen!=length2) {
185           log_err("ERROR in unorm_normalize(%s)[%d]: "
186                   "preflight length/NUL %d!=%d preflight length/srcLength\n",
187                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
188         }
189         if(status==U_BUFFER_OVERFLOW_ERROR)
190         {
191             status=U_ZERO_ERROR;
192         }
193         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
194         if(U_FAILURE(status) || neededLen!=length2) {
195             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
196                          modeStrings[mode], austrdup(source), myErrorName(status));
197         } else {
198             assertEqual(result, cases[x][expIndex], x);
199         }
200         length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
201         if(U_FAILURE(status) || neededLen!=length2) {
202             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
203                          modeStrings[mode], austrdup(source), myErrorName(status));
204         } else {
205             assertEqual(result, cases[x][expIndex], x);
206         }
207         free(source);
208     }
209 }
210 
TestDecomp()211 void TestDecomp() {
212     TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
213 }
214 
TestCompatDecomp()215 void TestCompatDecomp() {
216     TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
217 }
218 
TestCanonDecompCompose()219 void TestCanonDecompCompose() {
220     TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
221 }
222 
TestCompatDecompCompose()223 void TestCompatDecompCompose() {
224     TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
225 }
226 
TestFCD()227 void TestFCD() {
228     TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
229 }
230 
assertEqual(const UChar * result,const char * expected,int32_t index)231 static void assertEqual(const UChar* result, const char* expected, int32_t index)
232 {
233     UChar *expectedUni = CharsToUChars(expected);
234     if(u_strcmp(result, expectedUni)!=0){
235         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
236             austrdup(result) );
237     }
238     free(expectedUni);
239 }
240 
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)241 static void TestNull_check(UChar *src, int32_t srcLen,
242                     UChar *exp, int32_t expLen,
243                     UNormalizationMode mode,
244                     const char *name)
245 {
246     UErrorCode status = U_ZERO_ERROR;
247     int32_t len, i;
248 
249     UChar   result[50];
250 
251 
252     status = U_ZERO_ERROR;
253 
254     for(i=0;i<50;i++)
255       {
256         result[i] = 0xFFFD;
257       }
258 
259     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
260 
261     if(U_FAILURE(status)) {
262       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
263     } else if (len != expLen) {
264       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
265     }
266 
267     {
268       for(i=0;i<len;i++){
269         if(exp[i] != result[i]) {
270           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
271                   name,
272                   i,
273                   exp[i],
274                   result[i]);
275           return;
276         }
277         log_verbose("     %d: \\u%04X\n", i, result[i]);
278       }
279     }
280 
281     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
282 }
283 
TestNull()284 void TestNull()
285 {
286 
287     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
288     int32_t source_comp_len = 4;
289     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
290     int32_t expect_comp_len = 3;
291 
292     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
293     int32_t source_dcmp_len = 3;
294     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
295     int32_t expect_dcmp_len = 5;
296 
297     TestNull_check(source_comp,
298                    source_comp_len,
299                    expect_comp,
300                    expect_comp_len,
301                    UNORM_NFC,
302                    "UNORM_NFC");
303 
304     TestNull_check(source_dcmp,
305                    source_dcmp_len,
306                    expect_dcmp,
307                    expect_dcmp_len,
308                    UNORM_NFD,
309                    "UNORM_NFD");
310 
311     TestNull_check(source_comp,
312                    source_comp_len,
313                    expect_comp,
314                    expect_comp_len,
315                    UNORM_NFKC,
316                    "UNORM_NFKC");
317 
318 
319 }
320 
TestQuickCheckResultNO()321 static void TestQuickCheckResultNO()
322 {
323   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
324                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
325   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
326                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
327   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
328                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
329   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
330                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
331 
332 
333   const int SIZE = 10;
334 
335   int count = 0;
336   UErrorCode error = U_ZERO_ERROR;
337 
338   for (; count < SIZE; count ++)
339   {
340     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
341                                                               UNORM_NO)
342     {
343       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
344       return;
345     }
346     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
347                                                               UNORM_NO)
348     {
349       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
350       return;
351     }
352     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
353                                                               UNORM_NO)
354     {
355       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
356       return;
357     }
358     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
359                                                               UNORM_NO)
360     {
361       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
362       return;
363     }
364   }
365 }
366 
367 
TestQuickCheckResultYES()368 static void TestQuickCheckResultYES()
369 {
370   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
371                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
372   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
373                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
374   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
375                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
376   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
377                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
378 
379   const int SIZE = 10;
380   int count = 0;
381   UErrorCode error = U_ZERO_ERROR;
382 
383   UChar cp = 0;
384   while (cp < 0xA0)
385   {
386     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
387     {
388       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
389       return;
390     }
391     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
392                                                              UNORM_YES)
393     {
394       log_err("ERROR in NFC quick check at U+%04x\n", cp);
395       return;
396     }
397     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
398     {
399       log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
400       return;
401     }
402     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
403                                                              UNORM_YES)
404     {
405       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
406       return;
407     }
408     cp ++;
409   }
410 
411   for (; count < SIZE; count ++)
412   {
413     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
414                                                              UNORM_YES)
415     {
416       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
417       return;
418     }
419     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
420                                                           != UNORM_YES)
421     {
422       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
423       return;
424     }
425     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
426                                                              UNORM_YES)
427     {
428       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
429       return;
430     }
431     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
432                                                              UNORM_YES)
433     {
434       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
435       return;
436     }
437   }
438 }
439 
TestQuickCheckResultMAYBE()440 static void TestQuickCheckResultMAYBE()
441 {
442   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
443                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
444   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
445                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
446 
447 
448   const int SIZE = 10;
449 
450   int count = 0;
451   UErrorCode error = U_ZERO_ERROR;
452 
453   /* NFD and NFKD does not have any MAYBE codepoints */
454   for (; count < SIZE; count ++)
455   {
456     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
457                                                            UNORM_MAYBE)
458     {
459       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
460       return;
461     }
462     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
463                                                            UNORM_MAYBE)
464     {
465       log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
466       return;
467     }
468   }
469 }
470 
TestQuickCheckStringResult()471 static void TestQuickCheckStringResult()
472 {
473   int count;
474   UChar *d = NULL;
475   UChar *c = NULL;
476   UErrorCode error = U_ZERO_ERROR;
477 
478   for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
479   {
480     d = CharsToUChars(canonTests[count][1]);
481     c = CharsToUChars(canonTests[count][2]);
482     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
483                                                             UNORM_YES)
484     {
485       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
486       return;
487     }
488 
489     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
490                                                             UNORM_NO)
491     {
492       log_err("ERROR in NFC quick check for string at count %d\n", count);
493       return;
494     }
495 
496     free(d);
497     free(c);
498   }
499 
500   for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
501   {
502     d = CharsToUChars(compatTests[count][1]);
503     c = CharsToUChars(compatTests[count][2]);
504     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
505                                                             UNORM_YES)
506     {
507       log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
508       return;
509     }
510 
511     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
512                                                             UNORM_YES)
513     {
514       log_err("ERROR in NFKC quick check for string at count %d\n", count);
515       return;
516     }
517 
518     free(d);
519     free(c);
520   }
521 }
522 
TestQuickCheck()523 void TestQuickCheck()
524 {
525   TestQuickCheckResultNO();
526   TestQuickCheckResultYES();
527   TestQuickCheckResultMAYBE();
528   TestQuickCheckStringResult();
529 }
530 
531 /*
532  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
533  * normalized, and some that are not.
534  * Here we pick some specific cases and test the C API.
535  */
TestIsNormalized(void)536 static void TestIsNormalized(void) {
537     static const UChar notNFC[][8]={            /* strings that are not in NFC */
538         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
539         { 0xfb1d, 0 },                          /* excluded from composition */
540         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
541         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
542     };
543     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
544         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
545         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
546         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
547     };
548 
549     int32_t i;
550     UErrorCode errorCode;
551 
552     /* API test */
553 
554     /* normal case with length>=0 (length -1 used for special cases below) */
555     errorCode=U_ZERO_ERROR;
556     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
557         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
558     }
559 
560     /* incoming U_FAILURE */
561     errorCode=U_TRUNCATED_CHAR_FOUND;
562     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
563     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
564         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
565     }
566 
567     /* NULL source */
568     errorCode=U_ZERO_ERROR;
569     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
570     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
571         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
572     }
573 
574     /* bad length */
575     errorCode=U_ZERO_ERROR;
576     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
577     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
578         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
579     }
580 
581     /* specific cases */
582     for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
583         errorCode=U_ZERO_ERROR;
584         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
585             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
586         }
587         errorCode=U_ZERO_ERROR;
588         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
589             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
590         }
591     }
592     for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
593         errorCode=U_ZERO_ERROR;
594         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
595             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
596         }
597     }
598 }
599 
TestCheckFCD()600 void TestCheckFCD()
601 {
602   UErrorCode status = U_ZERO_ERROR;
603   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
604                          0x0A};
605   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
606                           0x02B9, 0x0314, 0x0315, 0x0316};
607   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
608                          0x0050, 0x0730, 0x09EE, 0x1E10};
609 
610   static const UChar datastr[][5] =
611   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
612     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
613     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
614     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
615   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
616 
617   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
618                             0x6a,
619                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
620                             0xea,
621                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
622                             0x0307, 0x0308, 0x0309, 0x030a,
623                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
624                             0x0327, 0x0328, 0x0329, 0x032a,
625                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
626                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
627 
628   int count = 0;
629 
630   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
631     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
632   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
633     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
634   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
635     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
636 
637   if (U_FAILURE(status))
638     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
639 
640   while (count < 4)
641   {
642     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
643     if (U_FAILURE(status)) {
644       log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
645       break;
646     }
647     else {
648       if (result[count] != fcdresult) {
649         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
650                  result[count]);
651       }
652     }
653     count ++;
654   }
655 
656   /* random checks of long strings */
657   status = U_ZERO_ERROR;
658   srand((unsigned)time( NULL ));
659 
660   for (count = 0; count < 50; count ++)
661   {
662     int size = 0;
663     UBool testresult = UNORM_YES;
664     UChar data[20];
665     UChar norm[100];
666     UChar nfd[100];
667     int normsize = 0;
668     int nfdsize = 0;
669 
670     while (size != 19) {
671       data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
672       log_verbose("0x%x", data[size]);
673       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
674                                   norm + normsize, 100 - normsize, &status);
675       if (U_FAILURE(status)) {
676         log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
677         break;
678       }
679       size ++;
680     }
681     log_verbose("\n");
682 
683     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
684                               nfd, 100, &status);
685     if (U_FAILURE(status)) {
686       log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
687     }
688 
689     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
690       testresult = UNORM_NO;
691     }
692     if (testresult == UNORM_YES) {
693       log_verbose("result UNORM_YES\n");
694     }
695     else {
696       log_verbose("result UNORM_NO\n");
697     }
698 
699     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
700       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
701     }
702   }
703 }
704 
705 static void
TestAPI()706 TestAPI() {
707     static const UChar in[]={ 0x68, 0xe4 };
708     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
709     UErrorCode errorCode;
710     int32_t length;
711 
712     /* try preflighting */
713     errorCode=U_ZERO_ERROR;
714     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
715     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
716         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
717         return;
718     }
719 
720     errorCode=U_ZERO_ERROR;
721     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
722     if(U_FAILURE(errorCode)) {
723         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
724         return;
725     }
726     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
727         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
728         return;
729     }
730     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
731     if(U_FAILURE(errorCode)) {
732         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
733         return;
734     }
735     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
736     if(U_FAILURE(errorCode)) {
737         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
738         return;
739     }
740 }
741 
742 /* test cases to improve test code coverage */
743 enum {
744     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
745     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
746     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
747 
748     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
749     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
750     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
751 
752     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
753     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
754 
755     MUSICAL_VOID_NOTEHEAD=0x1d157,
756     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
757     MUSICAL_STEM=0x1d165,       /* cc=216 */
758     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
759 };
760 
761 static void
TestNormCoverage()762 TestNormCoverage() {
763     UChar input[1000], expect[1000], output[1000];
764     UErrorCode errorCode;
765     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
766 
767     /* create a long and nasty string with NFKC-unsafe characters */
768     inLength=0;
769 
770     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
771     input[inLength++]=HANGUL_KIYEOK;
772     input[inLength++]=HANGUL_WEO;
773     input[inLength++]=HANGUL_KIYEOK_SIOS;
774 
775     input[inLength++]=HANGUL_KIYEOK;
776     input[inLength++]=HANGUL_WEO;
777     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
778 
779     input[inLength++]=HANGUL_KIYEOK;
780     input[inLength++]=HANGUL_K_WEO;
781     input[inLength++]=HANGUL_KIYEOK_SIOS;
782 
783     input[inLength++]=HANGUL_KIYEOK;
784     input[inLength++]=HANGUL_K_WEO;
785     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
786 
787     input[inLength++]=HANGUL_K_KIYEOK;
788     input[inLength++]=HANGUL_WEO;
789     input[inLength++]=HANGUL_KIYEOK_SIOS;
790 
791     input[inLength++]=HANGUL_K_KIYEOK;
792     input[inLength++]=HANGUL_WEO;
793     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
794 
795     input[inLength++]=HANGUL_K_KIYEOK;
796     input[inLength++]=HANGUL_K_WEO;
797     input[inLength++]=HANGUL_KIYEOK_SIOS;
798 
799     input[inLength++]=HANGUL_K_KIYEOK;
800     input[inLength++]=HANGUL_K_WEO;
801     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
802 
803     /* Hangul LV with normal/compatibility Jamo T */
804     input[inLength++]=HANGUL_AC00;
805     input[inLength++]=HANGUL_KIYEOK_SIOS;
806 
807     input[inLength++]=HANGUL_AC00;
808     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
809 
810     /* compatibility Jamo L, V */
811     input[inLength++]=HANGUL_K_KIYEOK;
812     input[inLength++]=HANGUL_K_WEO;
813 
814     hangulPrefixLength=inLength;
815 
816     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
817     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
818     for(i=0; i<200; ++i) {
819         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
820         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
821         input[inLength++]=U16_LEAD(MUSICAL_STEM);
822         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
823     }
824 
825     /* (compatibility) Jamo L, T do not compose */
826     input[inLength++]=HANGUL_K_KIYEOK;
827     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
828 
829     /* quick checks */
830     errorCode=U_ZERO_ERROR;
831     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
832         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
833     }
834     errorCode=U_ZERO_ERROR;
835     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
836         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
837     }
838     errorCode=U_ZERO_ERROR;
839     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
840         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
841     }
842     errorCode=U_ZERO_ERROR;
843     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
844         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
845     }
846     errorCode=U_ZERO_ERROR;
847     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
848         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
849     }
850 
851     /* NFKC */
852     expectLength=0;
853     expect[expectLength++]=HANGUL_SYLLABLE;
854 
855     expect[expectLength++]=HANGUL_SYLLABLE;
856 
857     expect[expectLength++]=HANGUL_SYLLABLE;
858 
859     expect[expectLength++]=HANGUL_SYLLABLE;
860 
861     expect[expectLength++]=HANGUL_SYLLABLE;
862 
863     expect[expectLength++]=HANGUL_SYLLABLE;
864 
865     expect[expectLength++]=HANGUL_SYLLABLE;
866 
867     expect[expectLength++]=HANGUL_SYLLABLE;
868 
869     expect[expectLength++]=HANGUL_AC00+3;
870 
871     expect[expectLength++]=HANGUL_AC00+3;
872 
873     expect[expectLength++]=HANGUL_AC00+14*28;
874 
875     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
876     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
877     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
878     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
879     for(i=0; i<200; ++i) {
880         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
881         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
882     }
883     for(i=0; i<200; ++i) {
884         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
885         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
886     }
887 
888     expect[expectLength++]=HANGUL_KIYEOK;
889     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
890 
891     /* try destination overflow first */
892     errorCode=U_ZERO_ERROR;
893     preflightLength=unorm_normalize(input, inLength,
894                            UNORM_NFKC, 0,
895                            output, 100, /* too short */
896                            &errorCode);
897     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
898         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
899     }
900 
901     /* real NFKC */
902     errorCode=U_ZERO_ERROR;
903     length=unorm_normalize(input, inLength,
904                            UNORM_NFKC, 0,
905                            output, UPRV_LENGTHOF(output),
906                            &errorCode);
907     if(U_FAILURE(errorCode)) {
908         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
909     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
910         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
911         for(i=0; i<length; ++i) {
912             if(output[i]!=expect[i]) {
913                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
914                 break;
915             }
916         }
917     }
918     if(length!=preflightLength) {
919         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
920     }
921 
922     /* FCD */
923     u_memcpy(expect, input, hangulPrefixLength);
924     expectLength=hangulPrefixLength;
925 
926     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
927     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
928     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
929     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
930     for(i=0; i<200; ++i) {
931         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
932         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
933     }
934     for(i=0; i<200; ++i) {
935         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
936         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
937     }
938 
939     expect[expectLength++]=HANGUL_K_KIYEOK;
940     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
941 
942     errorCode=U_ZERO_ERROR;
943     length=unorm_normalize(input, inLength,
944                            UNORM_FCD, 0,
945                            output, UPRV_LENGTHOF(output),
946                            &errorCode);
947     if(U_FAILURE(errorCode)) {
948         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
949     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
950         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
951         for(i=0; i<length; ++i) {
952             if(output[i]!=expect[i]) {
953                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
954                 break;
955             }
956         }
957     }
958 }
959 
960 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
961 static void
TestConcatenate(void)962 TestConcatenate(void) {
963     /* "re + 'sume'" */
964     static const UChar
965     left[]={
966         0x72, 0x65, 0
967     },
968     right[]={
969         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
970     },
971     expect[]={
972         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
973     };
974 
975     UChar buffer[100];
976     UErrorCode errorCode;
977     int32_t length;
978 
979     /* left with length, right NUL-terminated */
980     errorCode=U_ZERO_ERROR;
981     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
982     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
983         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
984     }
985 
986     /* preflighting */
987     errorCode=U_ZERO_ERROR;
988     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
989     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
990         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
991     }
992 
993     buffer[2]=0x5555;
994     errorCode=U_ZERO_ERROR;
995     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
996     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
997         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
998     }
999 
1000     /* enter with U_FAILURE */
1001     buffer[2]=0xaaaa;
1002     errorCode=U_UNEXPECTED_TOKEN;
1003     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1004     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1005         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1006     }
1007 
1008     /* illegal arguments */
1009     buffer[2]=0xaaaa;
1010     errorCode=U_ZERO_ERROR;
1011     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1012     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1013         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1014     }
1015 
1016     errorCode=U_ZERO_ERROR;
1017     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1018     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1019         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1020     }
1021 }
1022 
1023 enum {
1024     _PLUS=0x2b
1025 };
1026 
1027 static const char *const _modeString[UNORM_MODE_COUNT]={
1028     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1029 };
1030 
1031 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1032 _testIter(const UChar *src, int32_t srcLength,
1033           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1034           const UChar *out, int32_t outLength,
1035           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1036     UChar buffer[4];
1037     const UChar *expect, *outLimit, *in;
1038     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1039     UErrorCode errorCode;
1040     UBool neededToNormalize, expectNeeded;
1041 
1042     errorCode=U_ZERO_ERROR;
1043     outLimit=out+outLength;
1044     if(forward) {
1045         expect=out;
1046         i=index=0;
1047     } else {
1048         expect=outLimit;
1049         i=srcIndexesLength-2;
1050         index=srcLength;
1051     }
1052 
1053     for(;;) {
1054         prevIndex=index;
1055         if(forward) {
1056             if(!iter->hasNext(iter)) {
1057                 return;
1058             }
1059             length=unorm_next(iter,
1060                               buffer, UPRV_LENGTHOF(buffer),
1061                               mode, 0,
1062                               (UBool)(out!=NULL), &neededToNormalize,
1063                               &errorCode);
1064             expectIndex=srcIndexes[i+1];
1065             in=src+prevIndex;
1066             inLength=expectIndex-prevIndex;
1067 
1068             if(out!=NULL) {
1069                 /* get output piece from between plus signs */
1070                 expectLength=0;
1071                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1072                     ++expectLength;
1073                 }
1074                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1075             } else {
1076                 expect=in;
1077                 expectLength=inLength;
1078                 expectNeeded=FALSE;
1079             }
1080         } else {
1081             if(!iter->hasPrevious(iter)) {
1082                 return;
1083             }
1084             length=unorm_previous(iter,
1085                                   buffer, UPRV_LENGTHOF(buffer),
1086                                   mode, 0,
1087                                   (UBool)(out!=NULL), &neededToNormalize,
1088                                   &errorCode);
1089             expectIndex=srcIndexes[i];
1090             in=src+expectIndex;
1091             inLength=prevIndex-expectIndex;
1092 
1093             if(out!=NULL) {
1094                 /* get output piece from between plus signs */
1095                 expectLength=0;
1096                 while(expect!=out && expect[-1]!=_PLUS) {
1097                     ++expectLength;
1098                     --expect;
1099                 }
1100                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1101             } else {
1102                 expect=in;
1103                 expectLength=inLength;
1104                 expectNeeded=FALSE;
1105             }
1106         }
1107         index=iter->getIndex(iter, UITER_CURRENT);
1108 
1109         if(U_FAILURE(errorCode)) {
1110             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1111                     forward, _modeString[mode], i, u_errorName(errorCode));
1112             return;
1113         }
1114         if(expectIndex!=index) {
1115             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1116                     forward, _modeString[mode], i, index, expectIndex);
1117             return;
1118         }
1119         if(expectLength!=length) {
1120             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1121                     forward, _modeString[mode], i, length, expectLength);
1122             return;
1123         }
1124         if(0!=u_memcmp(expect, buffer, length)) {
1125             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1126                     forward, _modeString[mode], i);
1127             return;
1128         }
1129         if(neededToNormalize!=expectNeeded) {
1130         }
1131 
1132         if(forward) {
1133             expect+=expectLength+1; /* go after the + */
1134             ++i;
1135         } else {
1136             --expect; /* go before the + */
1137             --i;
1138         }
1139     }
1140 }
1141 
1142 static void
TestNextPrevious()1143 TestNextPrevious() {
1144     static const UChar
1145     src[]={ /* input string */
1146         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1147     },
1148     nfd[]={ /* + separates expected output pieces */
1149         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1150     },
1151     nfkd[]={
1152         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1153     },
1154     nfc[]={
1155         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1156     },
1157     nfkc[]={
1158         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1159     },
1160     fcd[]={
1161         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1162     };
1163 
1164     /* expected iterator indexes in the source string for each iteration piece */
1165     static const int32_t
1166     nfdIndexes[]={
1167         0, 1, 2, 5, 6, 7
1168     },
1169     nfkdIndexes[]={
1170         0, 1, 2, 5, 6, 7
1171     },
1172     nfcIndexes[]={
1173         0, 1, 2, 5, 6, 7
1174     },
1175     nfkcIndexes[]={
1176         0, 1, 2, 5, 7
1177     },
1178     fcdIndexes[]={
1179         0, 1, 2, 5, 6, 7
1180     };
1181 
1182     UCharIterator iter;
1183 
1184     UChar buffer[4];
1185     int32_t length;
1186 
1187     UBool neededToNormalize;
1188     UErrorCode errorCode;
1189 
1190     uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1191 
1192     /* test iteration with doNormalize */
1193     iter.index=0;
1194     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1195     iter.index=0;
1196     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1197     iter.index=0;
1198     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1199     iter.index=0;
1200     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1201     iter.index=0;
1202     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1203 
1204     iter.index=iter.length;
1205     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1206     iter.index=iter.length;
1207     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1208     iter.index=iter.length;
1209     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1210     iter.index=iter.length;
1211     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1212     iter.index=iter.length;
1213     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1214 
1215     /* test iteration without doNormalize */
1216     iter.index=0;
1217     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1218     iter.index=0;
1219     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1220     iter.index=0;
1221     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1222     iter.index=0;
1223     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1224     iter.index=0;
1225     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1226 
1227     iter.index=iter.length;
1228     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1229     iter.index=iter.length;
1230     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1231     iter.index=iter.length;
1232     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1233     iter.index=iter.length;
1234     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1235     iter.index=iter.length;
1236     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1237 
1238     /* try without neededToNormalize */
1239     errorCode=U_ZERO_ERROR;
1240     buffer[0]=5;
1241     iter.index=1;
1242     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1243                       UNORM_NFD, 0, TRUE, NULL,
1244                       &errorCode);
1245     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1246         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1247         return;
1248     }
1249 
1250     /* preflight */
1251     neededToNormalize=9;
1252     iter.index=1;
1253     length=unorm_next(&iter, NULL, 0,
1254                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1255                       &errorCode);
1256     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1257         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1258         return;
1259     }
1260 
1261     errorCode=U_ZERO_ERROR;
1262     buffer[0]=buffer[1]=5;
1263     neededToNormalize=9;
1264     iter.index=1;
1265     length=unorm_next(&iter, buffer, 1,
1266                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1267                       &errorCode);
1268     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1269         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1270         return;
1271     }
1272 
1273     /* no iterator */
1274     errorCode=U_ZERO_ERROR;
1275     buffer[0]=buffer[1]=5;
1276     neededToNormalize=9;
1277     iter.index=1;
1278     length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1279                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1280                       &errorCode);
1281     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1282         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1283         return;
1284     }
1285 
1286     /* illegal mode */
1287     buffer[0]=buffer[1]=5;
1288     neededToNormalize=9;
1289     iter.index=1;
1290     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1291                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1292                       &errorCode);
1293     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1294         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1295         return;
1296     }
1297 
1298     /* error coming in */
1299     errorCode=U_MISPLACED_QUANTIFIER;
1300     buffer[0]=5;
1301     iter.index=1;
1302     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1303                       UNORM_NFD, 0, TRUE, NULL,
1304                       &errorCode);
1305     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1306         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1307         return;
1308     }
1309 }
1310 
1311 static void
TestFCNFKCClosure(void)1312 TestFCNFKCClosure(void) {
1313     static const struct {
1314         UChar32 c;
1315         const UChar s[6];
1316     } tests[]={
1317         { 0x00C4, { 0 } },
1318         { 0x00E4, { 0 } },
1319         { 0x037A, { 0x0020, 0x03B9, 0 } },
1320         { 0x03D2, { 0x03C5, 0 } },
1321         { 0x20A8, { 0x0072, 0x0073, 0 } },
1322         { 0x210B, { 0x0068, 0 } },
1323         { 0x210C, { 0x0068, 0 } },
1324         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1325         { 0x2122, { 0x0074, 0x006D, 0 } },
1326         { 0x2128, { 0x007A, 0 } },
1327         { 0x1D5DB, { 0x0068, 0 } },
1328         { 0x1D5ED, { 0x007A, 0 } },
1329         { 0x0061, { 0 } }
1330     };
1331 
1332     UChar buffer[8];
1333     UErrorCode errorCode;
1334     int32_t i, length;
1335 
1336     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1337         errorCode=U_ZERO_ERROR;
1338         length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1339         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1340             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1341         }
1342     }
1343 
1344     /* error handling */
1345     errorCode=U_ZERO_ERROR;
1346     length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1347     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1348         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1349     }
1350 
1351     length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1352     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1353         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1354     }
1355 }
1356 
1357 static void
TestQuickCheckPerCP()1358 TestQuickCheckPerCP() {
1359     UErrorCode errorCode;
1360     UChar32 c, lead, trail;
1361     UChar s[U16_MAX_LENGTH], nfd[16];
1362     int32_t length, lccc1, lccc2, tccc1, tccc2;
1363     int32_t qc1, qc2;
1364 
1365     if(
1366         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1367         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1368         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1369         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1370         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1371         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1372     ) {
1373         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1374     }
1375 
1376     /*
1377      * compare the quick check property values for some code points
1378      * to the quick check results for checking same-code point strings
1379      */
1380     errorCode=U_ZERO_ERROR;
1381     c=0;
1382     while(c<0x110000) {
1383         length=0;
1384         U16_APPEND_UNSAFE(s, length, c);
1385 
1386         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1387         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1388         if(qc1!=qc2) {
1389             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1390         }
1391 
1392         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1393         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1394         if(qc1!=qc2) {
1395             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1396         }
1397 
1398         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1399         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1400         if(qc1!=qc2) {
1401             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1402         }
1403 
1404         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1405         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1406         if(qc1!=qc2) {
1407             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1408         }
1409 
1410         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1411         /* length-length == 0 is used to get around a compiler warning. */
1412         U16_GET(nfd, 0, length-length, length, lead);
1413         U16_GET(nfd, 0, length-1, length, trail);
1414 
1415         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1416         lccc2=u_getCombiningClass(lead);
1417         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1418         tccc2=u_getCombiningClass(trail);
1419 
1420         if(lccc1!=lccc2) {
1421             log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1422                     lccc1, lccc2, c);
1423         }
1424         if(tccc1!=tccc2) {
1425             log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1426                     tccc1, tccc2, c);
1427         }
1428 
1429         /* skip some code points */
1430         c=(20*c)/19+1;
1431     }
1432 }
1433 
1434 static void
TestComposition(void)1435 TestComposition(void) {
1436     static const struct {
1437         UNormalizationMode mode;
1438         uint32_t options;
1439         UChar input[12];
1440         UChar expect[12];
1441     } cases[]={
1442         /*
1443          * special cases for UAX #15 bug
1444          * see Unicode Corrigendum #5: Normalization Idempotency
1445          * at http://unicode.org/versions/corrigendum5.html
1446          * (was Public Review Issue #29)
1447          */
1448         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1449         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1450         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1451         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1452 
1453         /* TODO: add test cases for UNORM_FCC here (j2151) */
1454     };
1455 
1456     UChar output[16];
1457     UErrorCode errorCode;
1458     int32_t i, length;
1459 
1460     for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1461         errorCode=U_ZERO_ERROR;
1462         length=unorm_normalize(
1463                     cases[i].input, -1,
1464                     cases[i].mode, cases[i].options,
1465                     output, UPRV_LENGTHOF(output),
1466                     &errorCode);
1467         if( U_FAILURE(errorCode) ||
1468             length!=u_strlen(cases[i].expect) ||
1469             0!=u_memcmp(output, cases[i].expect, length)
1470         ) {
1471             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1472         }
1473     }
1474 }
1475 
1476 static void
TestGetDecomposition()1477 TestGetDecomposition() {
1478     UChar decomp[32];
1479     int32_t length;
1480 
1481     UErrorCode errorCode=U_ZERO_ERROR;
1482     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1483     if(U_FAILURE(errorCode)) {
1484         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1485         return;
1486     }
1487 
1488     length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1489     if(U_FAILURE(errorCode) || length>=0) {
1490         log_err("unorm2_getDecomposition(fcc, space) failed\n");
1491     }
1492     errorCode=U_ZERO_ERROR;
1493     length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1494     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1495         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1496     }
1497     errorCode=U_ZERO_ERROR;
1498     length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1499     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1500         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1501     }
1502     errorCode=U_ZERO_ERROR;
1503     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1504     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1505         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1506     }
1507     errorCode=U_ZERO_ERROR;
1508     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1509     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1510         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1511     }
1512     errorCode=U_ZERO_ERROR;
1513     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1514     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1515         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1516     }
1517 }
1518 
1519 static void
TestGetRawDecomposition()1520 TestGetRawDecomposition() {
1521     UChar decomp[32];
1522     int32_t length;
1523 
1524     UErrorCode errorCode=U_ZERO_ERROR;
1525     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1526     if(U_FAILURE(errorCode)) {
1527         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1528         return;
1529     }
1530     /*
1531      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1532      * without recursive decomposition.
1533      */
1534 
1535     length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1536     if(U_FAILURE(errorCode) || length>=0) {
1537         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1538     }
1539     errorCode=U_ZERO_ERROR;
1540     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1541     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1542         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1543     }
1544     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1545     errorCode=U_ZERO_ERROR;
1546     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1547     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1548         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1549     }
1550     /* U+212B ANGSTROM SIGN */
1551     errorCode=U_ZERO_ERROR;
1552     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1553     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1554         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1555     }
1556     errorCode=U_ZERO_ERROR;
1557     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1558     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1559         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1560     }
1561     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1562     errorCode=U_ZERO_ERROR;
1563     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1564     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1565         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1566     }
1567     errorCode=U_ZERO_ERROR;
1568     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1569     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1570         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1571     }
1572     errorCode=U_ZERO_ERROR;
1573     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1574     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1575         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1576     }
1577     errorCode=U_ZERO_ERROR;
1578     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1579     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1580         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1581     }
1582 }
1583 
1584 static void
TestAppendRestoreMiddle()1585 TestAppendRestoreMiddle() {
1586     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1587     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1588     /* NFC: C5 is 'A with ring above' */
1589     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1590     int32_t length;
1591     UErrorCode errorCode=U_ZERO_ERROR;
1592     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1593     if(U_FAILURE(errorCode)) {
1594         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1595         return;
1596     }
1597     /*
1598      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1599      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1600      * still fits into a[] but the full result still overflows this capacity.
1601      * (Let it modify the destination buffer before reallocating internally.)
1602      */
1603     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1604     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1605         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1606         return;
1607     }
1608     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1609     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1610         log_err("unorm2_append(overflow) modified the first string\n");
1611         return;
1612     }
1613     errorCode=U_ZERO_ERROR;
1614     length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1615     if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1616         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1617         return;
1618     }
1619 }
1620 
1621 static void
TestGetEasyToUseInstance()1622 TestGetEasyToUseInstance() {
1623     static const UChar in[]={
1624         0xA0,  /* -> <noBreak> 0020 */
1625         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1626     };
1627     UChar out[32];
1628     int32_t length;
1629 
1630     UErrorCode errorCode=U_ZERO_ERROR;
1631     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1632     if(U_FAILURE(errorCode)) {
1633         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1634         return;
1635     }
1636     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1637     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1638         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1639                 (int)length, u_errorName(errorCode));
1640     }
1641 
1642     errorCode=U_ZERO_ERROR;
1643     n2=unorm2_getNFDInstance(&errorCode);
1644     if(U_FAILURE(errorCode)) {
1645         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1646         return;
1647     }
1648     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1649     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1650         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1651                 (int)length, u_errorName(errorCode));
1652     }
1653 
1654     errorCode=U_ZERO_ERROR;
1655     n2=unorm2_getNFKCInstance(&errorCode);
1656     if(U_FAILURE(errorCode)) {
1657         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1658         return;
1659     }
1660     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1661     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1662         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1663                 (int)length, u_errorName(errorCode));
1664     }
1665 
1666     errorCode=U_ZERO_ERROR;
1667     n2=unorm2_getNFKDInstance(&errorCode);
1668     if(U_FAILURE(errorCode)) {
1669         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1670         return;
1671     }
1672     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1673     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1674         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1675                 (int)length, u_errorName(errorCode));
1676     }
1677 
1678     errorCode=U_ZERO_ERROR;
1679     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1680     if(U_FAILURE(errorCode)) {
1681         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1682         return;
1683     }
1684     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1685     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1686         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1687                 (int)length, u_errorName(errorCode));
1688     }
1689 }
1690 
1691 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1692