• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /********************************************************************************
7 *
8 * File CNORMTST.C
9 *
10 * Modification History:
11 *        Name                     Description
12 *     Madhu Katragadda            Ported for C API
13 *     synwee                      added test for quick check
14 *     synwee                      added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
19 #include "cintltst.h"
20 
21 #if UCONFIG_NO_NORMALIZATION
22 
addNormTest(TestNode ** root)23 void addNormTest(TestNode** root) {
24     /* no normalization - nothing to do */
25 }
26 
27 #else
28 
29 #include <stdlib.h>
30 #include <time.h>
31 #include "unicode/uchar.h"
32 #include "unicode/ustring.h"
33 #include "unicode/unorm.h"
34 #include "cnormtst.h"
35 
36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
37 
38 static void
39 TestAPI(void);
40 
41 static void
42 TestNormCoverage(void);
43 
44 static void
45 TestConcatenate(void);
46 
47 static void
48 TestNextPrevious(void);
49 
50 static void TestIsNormalized(void);
51 
52 static void
53 TestFCNFKCClosure(void);
54 
55 static void
56 TestQuickCheckPerCP(void);
57 
58 static void
59 TestComposition(void);
60 
61 static void
62 TestFCD(void);
63 
64 static void
65 TestGetDecomposition(void);
66 
67 static const char* const canonTests[][3] = {
68     /* Input*/                    /*Decomposed*/                /*Composed*/
69     { "cat",                    "cat",                        "cat"                    },
70     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
71 
72     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
73     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
74 
75     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
76     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
77     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
78 
79     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
80     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
81 
82     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
83     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
84     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
85 
86     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
87     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
88 
89     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
90     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
91 
92     { "Henry IV",                "Henry IV",                    "Henry IV"                },
93     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
94 
95     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
96     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
97     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
98     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
99     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
100     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
101     { "", "", "" }
102 };
103 
104 static const char* const compatTests[][3] = {
105     /* Input*/                        /*Decomposed    */                /*Composed*/
106     { "cat",                        "cat",                            "cat"                },
107 
108     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
109 
110     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
111     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
112 
113     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
114     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
115 
116     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
117     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
118 
119     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
120 
121     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
122     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
123     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
124     { "", "", "" }
125 };
126 
127 static const char* const fcdTests[][3] = {
128     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
129     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
130     { "\\u010e", "\\u010e", NULL }  /* D-caron */
131 };
132 
133 void addNormTest(TestNode** root);
134 
addNormTest(TestNode ** root)135 void addNormTest(TestNode** root)
136 {
137     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
138     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
139     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
140     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
141     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
142     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
143     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
144     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
145     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
146     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
147     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
148     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
149     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
150     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
151     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
152     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
153     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
154 }
155 
156 static const char* const modeStrings[]={
157     "UNORM_NONE",
158     "UNORM_NFD",
159     "UNORM_NFKD",
160     "UNORM_NFC",
161     "UNORM_NFKC",
162     "UNORM_FCD",
163     "UNORM_MODE_COUNT"
164 };
165 
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)166 static void TestNormCases(UNormalizationMode mode,
167                           const char* const cases[][3], int32_t lengthOfCases) {
168     int32_t x, neededLen, length2;
169     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
170     UChar *source=NULL;
171     UChar result[16];
172     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
173     for(x=0; x < lengthOfCases; x++)
174     {
175         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
176         source=CharsToUChars(cases[x][0]);
177         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
178         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
179         if(neededLen!=length2) {
180           log_err("ERROR in unorm_normalize(%s)[%d]: "
181                   "preflight length/NUL %d!=%d preflight length/srcLength\n",
182                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
183         }
184         if(status==U_BUFFER_OVERFLOW_ERROR)
185         {
186             status=U_ZERO_ERROR;
187         }
188         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, LENGTHOF(result), &status);
189         if(U_FAILURE(status) || neededLen!=length2) {
190             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
191                          modeStrings[mode], austrdup(source), myErrorName(status));
192         } else {
193             assertEqual(result, cases[x][expIndex], x);
194         }
195         length2=unorm_normalize(source, -1, mode, 0, result, LENGTHOF(result), &status);
196         if(U_FAILURE(status) || neededLen!=length2) {
197             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
198                          modeStrings[mode], austrdup(source), myErrorName(status));
199         } else {
200             assertEqual(result, cases[x][expIndex], x);
201         }
202         free(source);
203     }
204 }
205 
TestDecomp()206 void TestDecomp() {
207     TestNormCases(UNORM_NFD, canonTests, LENGTHOF(canonTests));
208 }
209 
TestCompatDecomp()210 void TestCompatDecomp() {
211     TestNormCases(UNORM_NFKD, compatTests, LENGTHOF(compatTests));
212 }
213 
TestCanonDecompCompose()214 void TestCanonDecompCompose() {
215     TestNormCases(UNORM_NFC, canonTests, LENGTHOF(canonTests));
216 }
217 
TestCompatDecompCompose()218 void TestCompatDecompCompose() {
219     TestNormCases(UNORM_NFKC, compatTests, LENGTHOF(compatTests));
220 }
221 
TestFCD()222 void TestFCD() {
223     TestNormCases(UNORM_FCD, fcdTests, LENGTHOF(fcdTests));
224 }
225 
assertEqual(const UChar * result,const char * expected,int32_t index)226 static void assertEqual(const UChar* result, const char* expected, int32_t index)
227 {
228     UChar *expectedUni = CharsToUChars(expected);
229     if(u_strcmp(result, expectedUni)!=0){
230         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
231             austrdup(result) );
232     }
233     free(expectedUni);
234 }
235 
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)236 static void TestNull_check(UChar *src, int32_t srcLen,
237                     UChar *exp, int32_t expLen,
238                     UNormalizationMode mode,
239                     const char *name)
240 {
241     UErrorCode status = U_ZERO_ERROR;
242     int32_t len, i;
243 
244     UChar   result[50];
245 
246 
247     status = U_ZERO_ERROR;
248 
249     for(i=0;i<50;i++)
250       {
251         result[i] = 0xFFFD;
252       }
253 
254     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
255 
256     if(U_FAILURE(status)) {
257       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
258     } else if (len != expLen) {
259       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
260     }
261 
262     {
263       for(i=0;i<len;i++){
264         if(exp[i] != result[i]) {
265           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
266                   name,
267                   i,
268                   exp[i],
269                   result[i]);
270           return;
271         }
272         log_verbose("     %d: \\u%04X\n", i, result[i]);
273       }
274     }
275 
276     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
277 }
278 
TestNull()279 void TestNull()
280 {
281 
282     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
283     int32_t source_comp_len = 4;
284     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
285     int32_t expect_comp_len = 3;
286 
287     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
288     int32_t source_dcmp_len = 3;
289     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
290     int32_t expect_dcmp_len = 5;
291 
292     TestNull_check(source_comp,
293                    source_comp_len,
294                    expect_comp,
295                    expect_comp_len,
296                    UNORM_NFC,
297                    "UNORM_NFC");
298 
299     TestNull_check(source_dcmp,
300                    source_dcmp_len,
301                    expect_dcmp,
302                    expect_dcmp_len,
303                    UNORM_NFD,
304                    "UNORM_NFD");
305 
306     TestNull_check(source_comp,
307                    source_comp_len,
308                    expect_comp,
309                    expect_comp_len,
310                    UNORM_NFKC,
311                    "UNORM_NFKC");
312 
313 
314 }
315 
TestQuickCheckResultNO()316 static void TestQuickCheckResultNO()
317 {
318   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
319                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
320   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
321                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
322   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
323                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
324   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
325                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
326 
327 
328   const int SIZE = 10;
329 
330   int count = 0;
331   UErrorCode error = U_ZERO_ERROR;
332 
333   for (; count < SIZE; count ++)
334   {
335     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
336                                                               UNORM_NO)
337     {
338       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
339       return;
340     }
341     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
342                                                               UNORM_NO)
343     {
344       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
345       return;
346     }
347     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
348                                                               UNORM_NO)
349     {
350       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
351       return;
352     }
353     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
354                                                               UNORM_NO)
355     {
356       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
357       return;
358     }
359   }
360 }
361 
362 
TestQuickCheckResultYES()363 static void TestQuickCheckResultYES()
364 {
365   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
366                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
367   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
368                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
369   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
370                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
371   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
372                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
373 
374   const int SIZE = 10;
375   int count = 0;
376   UErrorCode error = U_ZERO_ERROR;
377 
378   UChar cp = 0;
379   while (cp < 0xA0)
380   {
381     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
382     {
383       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
384       return;
385     }
386     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
387                                                              UNORM_YES)
388     {
389       log_err("ERROR in NFC quick check at U+%04x\n", cp);
390       return;
391     }
392     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
393     {
394       log_err("ERROR in NFKD quick check at U+%04x\n", cp);
395       return;
396     }
397     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
398                                                              UNORM_YES)
399     {
400       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
401       return;
402     }
403     cp ++;
404   }
405 
406   for (; count < SIZE; count ++)
407   {
408     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
409                                                              UNORM_YES)
410     {
411       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
412       return;
413     }
414     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
415                                                           != UNORM_YES)
416     {
417       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
418       return;
419     }
420     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
421                                                              UNORM_YES)
422     {
423       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
424       return;
425     }
426     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
427                                                              UNORM_YES)
428     {
429       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
430       return;
431     }
432   }
433 }
434 
TestQuickCheckResultMAYBE()435 static void TestQuickCheckResultMAYBE()
436 {
437   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
438                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
439   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
440                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
441 
442 
443   const int SIZE = 10;
444 
445   int count = 0;
446   UErrorCode error = U_ZERO_ERROR;
447 
448   /* NFD and NFKD does not have any MAYBE codepoints */
449   for (; count < SIZE; count ++)
450   {
451     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
452                                                            UNORM_MAYBE)
453     {
454       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
455       return;
456     }
457     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
458                                                            UNORM_MAYBE)
459     {
460       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
461       return;
462     }
463   }
464 }
465 
TestQuickCheckStringResult()466 static void TestQuickCheckStringResult()
467 {
468   int count;
469   UChar *d = NULL;
470   UChar *c = NULL;
471   UErrorCode error = U_ZERO_ERROR;
472 
473   for (count = 0; count < LENGTHOF(canonTests); count ++)
474   {
475     d = CharsToUChars(canonTests[count][1]);
476     c = CharsToUChars(canonTests[count][2]);
477     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
478                                                             UNORM_YES)
479     {
480       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
481       return;
482     }
483 
484     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
485                                                             UNORM_NO)
486     {
487       log_err("ERROR in NFC quick check for string at count %d\n", count);
488       return;
489     }
490 
491     free(d);
492     free(c);
493   }
494 
495   for (count = 0; count < LENGTHOF(compatTests); count ++)
496   {
497     d = CharsToUChars(compatTests[count][1]);
498     c = CharsToUChars(compatTests[count][2]);
499     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
500                                                             UNORM_YES)
501     {
502       log_err("ERROR in NFKD quick check for string at count %d\n", count);
503       return;
504     }
505 
506     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
507                                                             UNORM_YES)
508     {
509       log_err("ERROR in NFKC quick check for string at count %d\n", count);
510       return;
511     }
512 
513     free(d);
514     free(c);
515   }
516 }
517 
TestQuickCheck()518 void TestQuickCheck()
519 {
520   TestQuickCheckResultNO();
521   TestQuickCheckResultYES();
522   TestQuickCheckResultMAYBE();
523   TestQuickCheckStringResult();
524 }
525 
526 /*
527  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
528  * normalized, and some that are not.
529  * Here we pick some specific cases and test the C API.
530  */
TestIsNormalized(void)531 static void TestIsNormalized(void) {
532     static const UChar notNFC[][8]={            /* strings that are not in NFC */
533         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
534         { 0xfb1d, 0 },                          /* excluded from composition */
535         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
536         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
537     };
538     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
539         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
540         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
541         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
542     };
543 
544     int32_t i;
545     UErrorCode errorCode;
546 
547     /* API test */
548 
549     /* normal case with length>=0 (length -1 used for special cases below) */
550     errorCode=U_ZERO_ERROR;
551     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
552         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
553     }
554 
555     /* incoming U_FAILURE */
556     errorCode=U_TRUNCATED_CHAR_FOUND;
557     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
558     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
559         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
560     }
561 
562     /* NULL source */
563     errorCode=U_ZERO_ERROR;
564     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
565     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
566         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
567     }
568 
569     /* bad length */
570     errorCode=U_ZERO_ERROR;
571     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
572     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
573         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
574     }
575 
576     /* specific cases */
577     for(i=0; i<LENGTHOF(notNFC); ++i) {
578         errorCode=U_ZERO_ERROR;
579         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
580             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
581         }
582         errorCode=U_ZERO_ERROR;
583         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
584             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
585         }
586     }
587     for(i=0; i<LENGTHOF(notNFKC); ++i) {
588         errorCode=U_ZERO_ERROR;
589         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
590             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
591         }
592     }
593 }
594 
TestCheckFCD()595 void TestCheckFCD()
596 {
597   UErrorCode status = U_ZERO_ERROR;
598   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
599                          0x0A};
600   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
601                           0x02B9, 0x0314, 0x0315, 0x0316};
602   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
603                          0x0050, 0x0730, 0x09EE, 0x1E10};
604 
605   static const UChar datastr[][5] =
606   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
607     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
608     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
609     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
610   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
611 
612   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
613                             0x6a,
614                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
615                             0xea,
616                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
617                             0x0307, 0x0308, 0x0309, 0x030a,
618                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
619                             0x0327, 0x0328, 0x0329, 0x032a,
620                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
621                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
622 
623   int count = 0;
624 
625   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
626     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
627   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
628     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
629   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
630     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
631 
632   if (U_FAILURE(status))
633     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
634 
635   while (count < 4)
636   {
637     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
638     if (U_FAILURE(status)) {
639       log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
640       break;
641     }
642     else {
643       if (result[count] != fcdresult) {
644         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
645                  result[count]);
646       }
647     }
648     count ++;
649   }
650 
651   /* random checks of long strings */
652   status = U_ZERO_ERROR;
653   srand((unsigned)time( NULL ));
654 
655   for (count = 0; count < 50; count ++)
656   {
657     int size = 0;
658     UBool testresult = UNORM_YES;
659     UChar data[20];
660     UChar norm[100];
661     UChar nfd[100];
662     int normsize = 0;
663     int nfdsize = 0;
664 
665     while (size != 19) {
666       data[size] = datachar[(rand() * 50) / RAND_MAX];
667       log_verbose("0x%x", data[size]);
668       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
669                                   norm + normsize, 100 - normsize, &status);
670       if (U_FAILURE(status)) {
671         log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
672         break;
673       }
674       size ++;
675     }
676     log_verbose("\n");
677 
678     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
679                               nfd, 100, &status);
680     if (U_FAILURE(status)) {
681       log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
682     }
683 
684     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
685       testresult = UNORM_NO;
686     }
687     if (testresult == UNORM_YES) {
688       log_verbose("result UNORM_YES\n");
689     }
690     else {
691       log_verbose("result UNORM_NO\n");
692     }
693 
694     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
695       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
696     }
697   }
698 }
699 
700 static void
TestAPI()701 TestAPI() {
702     static const UChar in[]={ 0x68, 0xe4 };
703     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
704     UErrorCode errorCode;
705     int32_t length;
706 
707     /* try preflighting */
708     errorCode=U_ZERO_ERROR;
709     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
710     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
711         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
712         return;
713     }
714 
715     errorCode=U_ZERO_ERROR;
716     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
717     if(U_FAILURE(errorCode)) {
718         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
719         return;
720     }
721     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
722         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
723         return;
724     }
725     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
726     if(U_FAILURE(errorCode)) {
727         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
728         return;
729     }
730     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
731     if(U_FAILURE(errorCode)) {
732         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
733         return;
734     }
735 }
736 
737 /* test cases to improve test code coverage */
738 enum {
739     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
740     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
741     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
742 
743     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
744     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
745     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
746 
747     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
748     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
749 
750     MUSICAL_VOID_NOTEHEAD=0x1d157,
751     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
752     MUSICAL_STEM=0x1d165,       /* cc=216 */
753     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
754 };
755 
756 static void
TestNormCoverage()757 TestNormCoverage() {
758     UChar input[1000], expect[1000], output[1000];
759     UErrorCode errorCode;
760     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
761 
762     /* create a long and nasty string with NFKC-unsafe characters */
763     inLength=0;
764 
765     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
766     input[inLength++]=HANGUL_KIYEOK;
767     input[inLength++]=HANGUL_WEO;
768     input[inLength++]=HANGUL_KIYEOK_SIOS;
769 
770     input[inLength++]=HANGUL_KIYEOK;
771     input[inLength++]=HANGUL_WEO;
772     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
773 
774     input[inLength++]=HANGUL_KIYEOK;
775     input[inLength++]=HANGUL_K_WEO;
776     input[inLength++]=HANGUL_KIYEOK_SIOS;
777 
778     input[inLength++]=HANGUL_KIYEOK;
779     input[inLength++]=HANGUL_K_WEO;
780     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
781 
782     input[inLength++]=HANGUL_K_KIYEOK;
783     input[inLength++]=HANGUL_WEO;
784     input[inLength++]=HANGUL_KIYEOK_SIOS;
785 
786     input[inLength++]=HANGUL_K_KIYEOK;
787     input[inLength++]=HANGUL_WEO;
788     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
789 
790     input[inLength++]=HANGUL_K_KIYEOK;
791     input[inLength++]=HANGUL_K_WEO;
792     input[inLength++]=HANGUL_KIYEOK_SIOS;
793 
794     input[inLength++]=HANGUL_K_KIYEOK;
795     input[inLength++]=HANGUL_K_WEO;
796     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
797 
798     /* Hangul LV with normal/compatibility Jamo T */
799     input[inLength++]=HANGUL_AC00;
800     input[inLength++]=HANGUL_KIYEOK_SIOS;
801 
802     input[inLength++]=HANGUL_AC00;
803     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
804 
805     /* compatibility Jamo L, V */
806     input[inLength++]=HANGUL_K_KIYEOK;
807     input[inLength++]=HANGUL_K_WEO;
808 
809     hangulPrefixLength=inLength;
810 
811     input[inLength++]=UTF16_LEAD(MUSICAL_HALF_NOTE);
812     input[inLength++]=UTF16_TRAIL(MUSICAL_HALF_NOTE);
813     for(i=0; i<200; ++i) {
814         input[inLength++]=UTF16_LEAD(MUSICAL_STACCATO);
815         input[inLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
816         input[inLength++]=UTF16_LEAD(MUSICAL_STEM);
817         input[inLength++]=UTF16_TRAIL(MUSICAL_STEM);
818     }
819 
820     /* (compatibility) Jamo L, T do not compose */
821     input[inLength++]=HANGUL_K_KIYEOK;
822     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
823 
824     /* quick checks */
825     errorCode=U_ZERO_ERROR;
826     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
827         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
828     }
829     errorCode=U_ZERO_ERROR;
830     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
831         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
832     }
833     errorCode=U_ZERO_ERROR;
834     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
835         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
836     }
837     errorCode=U_ZERO_ERROR;
838     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
839         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
840     }
841     errorCode=U_ZERO_ERROR;
842     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
843         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
844     }
845 
846     /* NFKC */
847     expectLength=0;
848     expect[expectLength++]=HANGUL_SYLLABLE;
849 
850     expect[expectLength++]=HANGUL_SYLLABLE;
851 
852     expect[expectLength++]=HANGUL_SYLLABLE;
853 
854     expect[expectLength++]=HANGUL_SYLLABLE;
855 
856     expect[expectLength++]=HANGUL_SYLLABLE;
857 
858     expect[expectLength++]=HANGUL_SYLLABLE;
859 
860     expect[expectLength++]=HANGUL_SYLLABLE;
861 
862     expect[expectLength++]=HANGUL_SYLLABLE;
863 
864     expect[expectLength++]=HANGUL_AC00+3;
865 
866     expect[expectLength++]=HANGUL_AC00+3;
867 
868     expect[expectLength++]=HANGUL_AC00+14*28;
869 
870     expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
871     expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
872     expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
873     expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
874     for(i=0; i<200; ++i) {
875         expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
876         expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
877     }
878     for(i=0; i<200; ++i) {
879         expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
880         expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
881     }
882 
883     expect[expectLength++]=HANGUL_KIYEOK;
884     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
885 
886     /* try destination overflow first */
887     errorCode=U_ZERO_ERROR;
888     preflightLength=unorm_normalize(input, inLength,
889                            UNORM_NFKC, 0,
890                            output, 100, /* too short */
891                            &errorCode);
892     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
893         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
894     }
895 
896     /* real NFKC */
897     errorCode=U_ZERO_ERROR;
898     length=unorm_normalize(input, inLength,
899                            UNORM_NFKC, 0,
900                            output, sizeof(output)/U_SIZEOF_UCHAR,
901                            &errorCode);
902     if(U_FAILURE(errorCode)) {
903         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
904     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
905         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
906         for(i=0; i<length; ++i) {
907             if(output[i]!=expect[i]) {
908                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
909                 break;
910             }
911         }
912     }
913     if(length!=preflightLength) {
914         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
915     }
916 
917     /* FCD */
918     u_memcpy(expect, input, hangulPrefixLength);
919     expectLength=hangulPrefixLength;
920 
921     expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
922     expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
923     expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
924     expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
925     for(i=0; i<200; ++i) {
926         expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
927         expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
928     }
929     for(i=0; i<200; ++i) {
930         expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
931         expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
932     }
933 
934     expect[expectLength++]=HANGUL_K_KIYEOK;
935     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
936 
937     errorCode=U_ZERO_ERROR;
938     length=unorm_normalize(input, inLength,
939                            UNORM_FCD, 0,
940                            output, sizeof(output)/U_SIZEOF_UCHAR,
941                            &errorCode);
942     if(U_FAILURE(errorCode)) {
943         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
944     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
945         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
946         for(i=0; i<length; ++i) {
947             if(output[i]!=expect[i]) {
948                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
949                 break;
950             }
951         }
952     }
953 }
954 
955 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
956 static void
TestConcatenate(void)957 TestConcatenate(void) {
958     /* "re + 'sume'" */
959     static const UChar
960     left[]={
961         0x72, 0x65, 0
962     },
963     right[]={
964         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
965     },
966     expect[]={
967         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
968     };
969 
970     UChar buffer[100];
971     UErrorCode errorCode;
972     int32_t length;
973 
974     /* left with length, right NUL-terminated */
975     errorCode=U_ZERO_ERROR;
976     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
977     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
978         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
979     }
980 
981     /* preflighting */
982     errorCode=U_ZERO_ERROR;
983     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
984     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
985         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
986     }
987 
988     buffer[2]=0x5555;
989     errorCode=U_ZERO_ERROR;
990     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
991     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
992         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
993     }
994 
995     /* enter with U_FAILURE */
996     buffer[2]=0xaaaa;
997     errorCode=U_UNEXPECTED_TOKEN;
998     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
999     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1000         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1001     }
1002 
1003     /* illegal arguments */
1004     buffer[2]=0xaaaa;
1005     errorCode=U_ZERO_ERROR;
1006     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1007     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1008         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1009     }
1010 
1011     errorCode=U_ZERO_ERROR;
1012     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1013     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1014         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1015     }
1016 }
1017 
1018 enum {
1019     _PLUS=0x2b
1020 };
1021 
1022 static const char *const _modeString[UNORM_MODE_COUNT]={
1023     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1024 };
1025 
1026 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1027 _testIter(const UChar *src, int32_t srcLength,
1028           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1029           const UChar *out, int32_t outLength,
1030           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1031     UChar buffer[4];
1032     const UChar *expect, *outLimit, *in;
1033     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1034     UErrorCode errorCode;
1035     UBool neededToNormalize, expectNeeded;
1036 
1037     errorCode=U_ZERO_ERROR;
1038     outLimit=out+outLength;
1039     if(forward) {
1040         expect=out;
1041         i=index=0;
1042     } else {
1043         expect=outLimit;
1044         i=srcIndexesLength-2;
1045         index=srcLength;
1046     }
1047 
1048     for(;;) {
1049         prevIndex=index;
1050         if(forward) {
1051             if(!iter->hasNext(iter)) {
1052                 return;
1053             }
1054             length=unorm_next(iter,
1055                               buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1056                               mode, 0,
1057                               (UBool)(out!=NULL), &neededToNormalize,
1058                               &errorCode);
1059             expectIndex=srcIndexes[i+1];
1060             in=src+prevIndex;
1061             inLength=expectIndex-prevIndex;
1062 
1063             if(out!=NULL) {
1064                 /* get output piece from between plus signs */
1065                 expectLength=0;
1066                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1067                     ++expectLength;
1068                 }
1069                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1070             } else {
1071                 expect=in;
1072                 expectLength=inLength;
1073                 expectNeeded=FALSE;
1074             }
1075         } else {
1076             if(!iter->hasPrevious(iter)) {
1077                 return;
1078             }
1079             length=unorm_previous(iter,
1080                                   buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1081                                   mode, 0,
1082                                   (UBool)(out!=NULL), &neededToNormalize,
1083                                   &errorCode);
1084             expectIndex=srcIndexes[i];
1085             in=src+expectIndex;
1086             inLength=prevIndex-expectIndex;
1087 
1088             if(out!=NULL) {
1089                 /* get output piece from between plus signs */
1090                 expectLength=0;
1091                 while(expect!=out && expect[-1]!=_PLUS) {
1092                     ++expectLength;
1093                     --expect;
1094                 }
1095                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1096             } else {
1097                 expect=in;
1098                 expectLength=inLength;
1099                 expectNeeded=FALSE;
1100             }
1101         }
1102         index=iter->getIndex(iter, UITER_CURRENT);
1103 
1104         if(U_FAILURE(errorCode)) {
1105             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1106                     forward, _modeString[mode], i, u_errorName(errorCode));
1107             return;
1108         }
1109         if(expectIndex!=index) {
1110             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1111                     forward, _modeString[mode], i, index, expectIndex);
1112             return;
1113         }
1114         if(expectLength!=length) {
1115             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1116                     forward, _modeString[mode], i, length, expectLength);
1117             return;
1118         }
1119         if(0!=u_memcmp(expect, buffer, length)) {
1120             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1121                     forward, _modeString[mode], i);
1122             return;
1123         }
1124         if(neededToNormalize!=expectNeeded) {
1125         }
1126 
1127         if(forward) {
1128             expect+=expectLength+1; /* go after the + */
1129             ++i;
1130         } else {
1131             --expect; /* go before the + */
1132             --i;
1133         }
1134     }
1135 }
1136 
1137 static void
TestNextPrevious()1138 TestNextPrevious() {
1139     static const UChar
1140     src[]={ /* input string */
1141         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1142     },
1143     nfd[]={ /* + separates expected output pieces */
1144         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1145     },
1146     nfkd[]={
1147         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1148     },
1149     nfc[]={
1150         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1151     },
1152     nfkc[]={
1153         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1154     },
1155     fcd[]={
1156         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1157     };
1158 
1159     /* expected iterator indexes in the source string for each iteration piece */
1160     static const int32_t
1161     nfdIndexes[]={
1162         0, 1, 2, 5, 6, 7
1163     },
1164     nfkdIndexes[]={
1165         0, 1, 2, 5, 6, 7
1166     },
1167     nfcIndexes[]={
1168         0, 1, 2, 5, 6, 7
1169     },
1170     nfkcIndexes[]={
1171         0, 1, 2, 5, 7
1172     },
1173     fcdIndexes[]={
1174         0, 1, 2, 5, 6, 7
1175     };
1176 
1177     UCharIterator iter;
1178 
1179     UChar buffer[4];
1180     int32_t length;
1181 
1182     UBool neededToNormalize;
1183     UErrorCode errorCode;
1184 
1185     uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1186 
1187     /* test iteration with doNormalize */
1188     iter.index=0;
1189     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1190     iter.index=0;
1191     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1192     iter.index=0;
1193     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1194     iter.index=0;
1195     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1196     iter.index=0;
1197     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1198 
1199     iter.index=iter.length;
1200     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1201     iter.index=iter.length;
1202     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1203     iter.index=iter.length;
1204     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1205     iter.index=iter.length;
1206     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1207     iter.index=iter.length;
1208     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1209 
1210     /* test iteration without doNormalize */
1211     iter.index=0;
1212     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1213     iter.index=0;
1214     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1215     iter.index=0;
1216     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1217     iter.index=0;
1218     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1219     iter.index=0;
1220     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1221 
1222     iter.index=iter.length;
1223     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1224     iter.index=iter.length;
1225     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1226     iter.index=iter.length;
1227     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1228     iter.index=iter.length;
1229     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1230     iter.index=iter.length;
1231     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1232 
1233     /* try without neededToNormalize */
1234     errorCode=U_ZERO_ERROR;
1235     buffer[0]=5;
1236     iter.index=1;
1237     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1238                       UNORM_NFD, 0, TRUE, NULL,
1239                       &errorCode);
1240     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1241         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1242         return;
1243     }
1244 
1245     /* preflight */
1246     neededToNormalize=9;
1247     iter.index=1;
1248     length=unorm_next(&iter, NULL, 0,
1249                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1250                       &errorCode);
1251     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1252         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1253         return;
1254     }
1255 
1256     errorCode=U_ZERO_ERROR;
1257     buffer[0]=buffer[1]=5;
1258     neededToNormalize=9;
1259     iter.index=1;
1260     length=unorm_next(&iter, buffer, 1,
1261                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1262                       &errorCode);
1263     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1264         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1265         return;
1266     }
1267 
1268     /* no iterator */
1269     errorCode=U_ZERO_ERROR;
1270     buffer[0]=buffer[1]=5;
1271     neededToNormalize=9;
1272     iter.index=1;
1273     length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1274                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1275                       &errorCode);
1276     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1277         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1278         return;
1279     }
1280 
1281     /* illegal mode */
1282     buffer[0]=buffer[1]=5;
1283     neededToNormalize=9;
1284     iter.index=1;
1285     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1286                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1287                       &errorCode);
1288     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1289         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1290         return;
1291     }
1292 
1293     /* error coming in */
1294     errorCode=U_MISPLACED_QUANTIFIER;
1295     buffer[0]=5;
1296     iter.index=1;
1297     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1298                       UNORM_NFD, 0, TRUE, NULL,
1299                       &errorCode);
1300     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1301         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1302         return;
1303     }
1304 }
1305 
1306 static void
TestFCNFKCClosure(void)1307 TestFCNFKCClosure(void) {
1308     static const struct {
1309         UChar32 c;
1310         const UChar s[6];
1311     } tests[]={
1312         { 0x00C4, { 0 } },
1313         { 0x00E4, { 0 } },
1314         { 0x037A, { 0x0020, 0x03B9, 0 } },
1315         { 0x03D2, { 0x03C5, 0 } },
1316         { 0x20A8, { 0x0072, 0x0073, 0 } },
1317         { 0x210B, { 0x0068, 0 } },
1318         { 0x210C, { 0x0068, 0 } },
1319         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1320         { 0x2122, { 0x0074, 0x006D, 0 } },
1321         { 0x2128, { 0x007A, 0 } },
1322         { 0x1D5DB, { 0x0068, 0 } },
1323         { 0x1D5ED, { 0x007A, 0 } },
1324         { 0x0061, { 0 } }
1325     };
1326 
1327     UChar buffer[8];
1328     UErrorCode errorCode;
1329     int32_t i, length;
1330 
1331     for(i=0; i<LENGTHOF(tests); ++i) {
1332         errorCode=U_ZERO_ERROR;
1333         length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode);
1334         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1335             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1336         }
1337     }
1338 
1339     /* error handling */
1340     errorCode=U_ZERO_ERROR;
1341     length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode);
1342     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1343         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1344     }
1345 
1346     length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode);
1347     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1348         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1349     }
1350 }
1351 
1352 static void
TestQuickCheckPerCP()1353 TestQuickCheckPerCP() {
1354     UErrorCode errorCode;
1355     UChar32 c, lead, trail;
1356     UChar s[U16_MAX_LENGTH], nfd[16];
1357     int32_t length, lccc1, lccc2, tccc1, tccc2;
1358     int32_t qc1, qc2;
1359 
1360     if(
1361         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1362         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1363         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1364         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1365         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1366         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1367     ) {
1368         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1369     }
1370 
1371     /*
1372      * compare the quick check property values for some code points
1373      * to the quick check results for checking same-code point strings
1374      */
1375     errorCode=U_ZERO_ERROR;
1376     c=0;
1377     while(c<0x110000) {
1378         length=0;
1379         U16_APPEND_UNSAFE(s, length, c);
1380 
1381         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1382         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1383         if(qc1!=qc2) {
1384             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1385         }
1386 
1387         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1388         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1389         if(qc1!=qc2) {
1390             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1391         }
1392 
1393         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1394         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1395         if(qc1!=qc2) {
1396             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1397         }
1398 
1399         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1400         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1401         if(qc1!=qc2) {
1402             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1403         }
1404 
1405         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
1406         /* length-length == 0 is used to get around a compiler warning. */
1407         U16_GET(nfd, 0, length-length, length, lead);
1408         U16_GET(nfd, 0, length-1, length, trail);
1409 
1410         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1411         lccc2=u_getCombiningClass(lead);
1412         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1413         tccc2=u_getCombiningClass(trail);
1414 
1415         if(lccc1!=lccc2) {
1416             log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1417                     lccc1, lccc2, c);
1418         }
1419         if(tccc1!=tccc2) {
1420             log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1421                     tccc1, tccc2, c);
1422         }
1423 
1424         /* skip some code points */
1425         c=(20*c)/19+1;
1426     }
1427 }
1428 
1429 static void
TestComposition(void)1430 TestComposition(void) {
1431     static const struct {
1432         UNormalizationMode mode;
1433         uint32_t options;
1434         UChar input[12];
1435         UChar expect[12];
1436     } cases[]={
1437         /*
1438          * special cases for UAX #15 bug
1439          * see Unicode Corrigendum #5: Normalization Idempotency
1440          * at http://unicode.org/versions/corrigendum5.html
1441          * (was Public Review Issue #29)
1442          */
1443         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1444         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1445         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1446         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1447 
1448         /* TODO: add test cases for UNORM_FCC here (j2151) */
1449     };
1450 
1451     UChar output[16];
1452     UErrorCode errorCode;
1453     int32_t i, length;
1454 
1455     for(i=0; i<LENGTHOF(cases); ++i) {
1456         errorCode=U_ZERO_ERROR;
1457         length=unorm_normalize(
1458                     cases[i].input, -1,
1459                     cases[i].mode, cases[i].options,
1460                     output, LENGTHOF(output),
1461                     &errorCode);
1462         if( U_FAILURE(errorCode) ||
1463             length!=u_strlen(cases[i].expect) ||
1464             0!=u_memcmp(output, cases[i].expect, length)
1465         ) {
1466             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1467         }
1468     }
1469 }
1470 
1471 static void
TestGetDecomposition()1472 TestGetDecomposition() {
1473     UChar decomp[32];
1474     int32_t length;
1475 
1476     UErrorCode errorCode=U_ZERO_ERROR;
1477     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1478     if(U_FAILURE(errorCode)) {
1479         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1480         return;
1481     }
1482 
1483     length=unorm2_getDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1484     if(U_FAILURE(errorCode) || length>=0) {
1485         log_err("unorm2_getDecomposition(space) failed\n");
1486     }
1487     errorCode=U_ZERO_ERROR;
1488     length=unorm2_getDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1489     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1490         log_err("unorm2_getDecomposition(a-umlaut) failed\n");
1491     }
1492     errorCode=U_ZERO_ERROR;
1493     length=unorm2_getDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1494     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1495         log_err("unorm2_getDecomposition(Hangul syllable U+AC01) failed\n");
1496     }
1497     errorCode=U_ZERO_ERROR;
1498     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1499     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1500         log_err("unorm2_getDecomposition(Hangul syllable U+AC01) overflow failed\n");
1501     }
1502     errorCode=U_ZERO_ERROR;
1503     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1504     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1505         log_err("unorm2_getDecomposition(capacity<0) failed\n");
1506     }
1507     errorCode=U_ZERO_ERROR;
1508     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1509     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1510         log_err("unorm2_getDecomposition(decomposition=NULL) failed\n");
1511     }
1512 }
1513 
1514 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1515