• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 *     synwee                      added test for quick check
16 *     synwee                      added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 
25 #if !UCONFIG_NO_NORMALIZATION
26 
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33 
34 static void
35 TestAPI(void);
36 
37 static void
38 TestNormCoverage(void);
39 
40 static void
41 TestConcatenate(void);
42 
43 static void
44 TestNextPrevious(void);
45 
46 static void TestIsNormalized(void);
47 
48 static void
49 TestFCNFKCClosure(void);
50 
51 static void
52 TestQuickCheckPerCP(void);
53 
54 static void
55 TestComposition(void);
56 
57 static void
58 TestFCD(void);
59 
60 static void
61 TestGetDecomposition(void);
62 
63 static void
64 TestGetRawDecomposition(void);
65 
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68 
69 static const char* const canonTests[][3] = {
70     /* Input*/                    /*Decomposed*/                /*Composed*/
71     { "cat",                    "cat",                        "cat"                    },
72     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
73 
74     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
75     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
76 
77     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
78     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
79     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
80 
81     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
82     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
83 
84     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
85     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
86     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
87 
88     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
89     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
90 
91     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
92     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
93 
94     { "Henry IV",                "Henry IV",                    "Henry IV"                },
95     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
96 
97     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
98     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
99     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
100     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
101     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
102     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
103     { "", "", "" }
104 };
105 
106 static const char* const compatTests[][3] = {
107     /* Input*/                        /*Decomposed    */                /*Composed*/
108     { "cat",                        "cat",                            "cat"                },
109 
110     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
111 
112     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
113     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
114 
115     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
116     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
117 
118     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
119     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
120 
121     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
122 
123     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
125     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
126     { "", "", "" }
127 };
128 
129 static const char* const fcdTests[][3] = {
130     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
132     { "\\u010e", "\\u010e", NULL }  /* D-caron */
133 };
134 
135 void addNormTest(TestNode** root);
136 
addNormTest(TestNode ** root)137 void addNormTest(TestNode** root)
138 {
139     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 }
160 
161 static const char* const modeStrings[]={
162     "?",
163     "UNORM_NONE",
164     "UNORM_NFD",
165     "UNORM_NFKD",
166     "UNORM_NFC",
167     "UNORM_NFKC",
168     "UNORM_FCD",
169     "UNORM_MODE_COUNT"
170 };
171 
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)172 static void TestNormCases(UNormalizationMode mode,
173                           const char* const cases[][3], int32_t lengthOfCases) {
174     int32_t x, neededLen, length2;
175     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
176     UChar *source=NULL;
177     UChar result[16];
178     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
179     for(x=0; x < lengthOfCases; x++)
180     {
181         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
182         source=CharsToUChars(cases[x][0]);
183         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
184         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
185         if(neededLen!=length2) {
186           log_err("ERROR in unorm_normalize(%s)[%d]: "
187                   "preflight length/srcLength %d!=%d preflight length/NUL\n",
188                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
189         }
190         if(status==U_BUFFER_OVERFLOW_ERROR)
191         {
192             status=U_ZERO_ERROR;
193         }
194         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
195         if(U_FAILURE(status) || neededLen!=length2) {
196             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
197                          modeStrings[mode], austrdup(source), myErrorName(status));
198         } else {
199             assertEqual(result, cases[x][expIndex], x);
200         }
201         length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
202         if(U_FAILURE(status) || neededLen!=length2) {
203             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
204                          modeStrings[mode], austrdup(source), myErrorName(status));
205         } else {
206             assertEqual(result, cases[x][expIndex], x);
207         }
208         free(source);
209     }
210 }
211 
TestDecomp()212 void TestDecomp() {
213     TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
214 }
215 
TestCompatDecomp()216 void TestCompatDecomp() {
217     TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
218 }
219 
TestCanonDecompCompose()220 void TestCanonDecompCompose() {
221     TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
222 }
223 
TestCompatDecompCompose()224 void TestCompatDecompCompose() {
225     TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
226 }
227 
TestFCD()228 void TestFCD() {
229     TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
230 }
231 
assertEqual(const UChar * result,const char * expected,int32_t index)232 static void assertEqual(const UChar* result, const char* expected, int32_t index)
233 {
234     UChar *expectedUni = CharsToUChars(expected);
235     if(u_strcmp(result, expectedUni)!=0){
236         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
237             austrdup(result) );
238     }
239     free(expectedUni);
240 }
241 
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)242 static void TestNull_check(UChar *src, int32_t srcLen,
243                     UChar *exp, int32_t expLen,
244                     UNormalizationMode mode,
245                     const char *name)
246 {
247     UErrorCode status = U_ZERO_ERROR;
248     int32_t len, i;
249 
250     UChar   result[50];
251 
252 
253     status = U_ZERO_ERROR;
254 
255     for(i=0;i<50;i++)
256       {
257         result[i] = 0xFFFD;
258       }
259 
260     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
261 
262     if(U_FAILURE(status)) {
263       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
264     } else if (len != expLen) {
265       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
266     }
267 
268     {
269       for(i=0;i<len;i++){
270         if(exp[i] != result[i]) {
271           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
272                   name,
273                   i,
274                   exp[i],
275                   result[i]);
276           return;
277         }
278         log_verbose("     %d: \\u%04X\n", i, result[i]);
279       }
280     }
281 
282     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
283 }
284 
TestNull()285 void TestNull()
286 {
287 
288     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
289     int32_t source_comp_len = 4;
290     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
291     int32_t expect_comp_len = 3;
292 
293     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
294     int32_t source_dcmp_len = 3;
295     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
296     int32_t expect_dcmp_len = 5;
297 
298     TestNull_check(source_comp,
299                    source_comp_len,
300                    expect_comp,
301                    expect_comp_len,
302                    UNORM_NFC,
303                    "UNORM_NFC");
304 
305     TestNull_check(source_dcmp,
306                    source_dcmp_len,
307                    expect_dcmp,
308                    expect_dcmp_len,
309                    UNORM_NFD,
310                    "UNORM_NFD");
311 
312     TestNull_check(source_comp,
313                    source_comp_len,
314                    expect_comp,
315                    expect_comp_len,
316                    UNORM_NFKC,
317                    "UNORM_NFKC");
318 
319 
320 }
321 
TestQuickCheckResultNO()322 static void TestQuickCheckResultNO()
323 {
324   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
325                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
326   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
327                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
328   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
329                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
331                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
332 
333 
334   const int SIZE = 10;
335 
336   int count = 0;
337   UErrorCode error = U_ZERO_ERROR;
338 
339   for (; count < SIZE; count ++)
340   {
341     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
342                                                               UNORM_NO)
343     {
344       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
345       return;
346     }
347     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
348                                                               UNORM_NO)
349     {
350       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
351       return;
352     }
353     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
354                                                               UNORM_NO)
355     {
356       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
357       return;
358     }
359     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
360                                                               UNORM_NO)
361     {
362       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
363       return;
364     }
365   }
366 }
367 
368 
TestQuickCheckResultYES()369 static void TestQuickCheckResultYES()
370 {
371   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
372                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
373   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
374                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
375   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
376                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
377   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
378                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
379 
380   const int SIZE = 10;
381   int count = 0;
382   UErrorCode error = U_ZERO_ERROR;
383 
384   UChar cp = 0;
385   while (cp < 0xA0)
386   {
387     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
388     {
389       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
390       return;
391     }
392     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
393                                                              UNORM_YES)
394     {
395       log_err("ERROR in NFC quick check at U+%04x\n", cp);
396       return;
397     }
398     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
399     {
400       log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
401       return;
402     }
403     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
404                                                              UNORM_YES)
405     {
406       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
407       return;
408     }
409     cp ++;
410   }
411 
412   for (; count < SIZE; count ++)
413   {
414     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
415                                                              UNORM_YES)
416     {
417       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
418       return;
419     }
420     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
421                                                           != UNORM_YES)
422     {
423       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
424       return;
425     }
426     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
427                                                              UNORM_YES)
428     {
429       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
430       return;
431     }
432     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
433                                                              UNORM_YES)
434     {
435       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
436       return;
437     }
438   }
439 }
440 
TestQuickCheckResultMAYBE()441 static void TestQuickCheckResultMAYBE()
442 {
443   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
444                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
445   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
446                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
447 
448 
449   const int SIZE = 10;
450 
451   int count = 0;
452   UErrorCode error = U_ZERO_ERROR;
453 
454   /* NFD and NFKD does not have any MAYBE codepoints */
455   for (; count < SIZE; count ++)
456   {
457     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
458                                                            UNORM_MAYBE)
459     {
460       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
461       return;
462     }
463     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
464                                                            UNORM_MAYBE)
465     {
466       log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
467       return;
468     }
469   }
470 }
471 
TestQuickCheckStringResult()472 static void TestQuickCheckStringResult()
473 {
474   int count;
475   UChar *d = NULL;
476   UChar *c = NULL;
477   UErrorCode error = U_ZERO_ERROR;
478 
479   for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
480   {
481     d = CharsToUChars(canonTests[count][1]);
482     c = CharsToUChars(canonTests[count][2]);
483     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
484                                                             UNORM_YES)
485     {
486       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
487       return;
488     }
489 
490     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
491                                                             UNORM_NO)
492     {
493       log_err("ERROR in NFC quick check for string at count %d\n", count);
494       return;
495     }
496 
497     free(d);
498     free(c);
499   }
500 
501   for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
502   {
503     d = CharsToUChars(compatTests[count][1]);
504     c = CharsToUChars(compatTests[count][2]);
505     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
506                                                             UNORM_YES)
507     {
508       log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
509       return;
510     }
511 
512     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
513                                                             UNORM_YES)
514     {
515       log_err("ERROR in NFKC quick check for string at count %d\n", count);
516       return;
517     }
518 
519     free(d);
520     free(c);
521   }
522 }
523 
TestQuickCheck()524 void TestQuickCheck()
525 {
526   TestQuickCheckResultNO();
527   TestQuickCheckResultYES();
528   TestQuickCheckResultMAYBE();
529   TestQuickCheckStringResult();
530 }
531 
532 /*
533  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
534  * normalized, and some that are not.
535  * Here we pick some specific cases and test the C API.
536  */
TestIsNormalized(void)537 static void TestIsNormalized(void) {
538     static const UChar notNFC[][8]={            /* strings that are not in NFC */
539         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
540         { 0xfb1d, 0 },                          /* excluded from composition */
541         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
542         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
543     };
544     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
545         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
546         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
547         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
548     };
549 
550     int32_t i;
551     UErrorCode errorCode;
552 
553     /* API test */
554 
555     /* normal case with length>=0 (length -1 used for special cases below) */
556     errorCode=U_ZERO_ERROR;
557     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
558         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
559     }
560 
561     /* incoming U_FAILURE */
562     errorCode=U_TRUNCATED_CHAR_FOUND;
563     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
564     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
565         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
566     }
567 
568     /* NULL source */
569     errorCode=U_ZERO_ERROR;
570     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
571     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
572         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
573     }
574 
575     /* bad length */
576     errorCode=U_ZERO_ERROR;
577     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
578     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
579         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
580     }
581 
582     /* specific cases */
583     for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
584         errorCode=U_ZERO_ERROR;
585         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
586             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
587         }
588         errorCode=U_ZERO_ERROR;
589         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
590             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
591         }
592     }
593     for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
594         errorCode=U_ZERO_ERROR;
595         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
596             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
597         }
598     }
599 }
600 
TestCheckFCD()601 void TestCheckFCD()
602 {
603   UErrorCode status = U_ZERO_ERROR;
604   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
605                          0x0A};
606   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
607                           0x02B9, 0x0314, 0x0315, 0x0316};
608   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
609                          0x0050, 0x0730, 0x09EE, 0x1E10};
610 
611   static const UChar datastr[][5] =
612   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
613     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
614     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
615     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
616   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
617 
618   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
619                             0x6a,
620                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
621                             0xea,
622                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
623                             0x0307, 0x0308, 0x0309, 0x030a,
624                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
625                             0x0327, 0x0328, 0x0329, 0x032a,
626                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
627                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
628 
629   int count = 0;
630 
631   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
632     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
633   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
634     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
635   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
636     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
637 
638   if (U_FAILURE(status))
639     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
640 
641   while (count < 4)
642   {
643     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
644     if (U_FAILURE(status)) {
645       log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
646       break;
647     }
648     else {
649       if (result[count] != fcdresult) {
650         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
651                  result[count]);
652       }
653     }
654     count ++;
655   }
656 
657   /* random checks of long strings */
658   status = U_ZERO_ERROR;
659   srand((unsigned)time( NULL ));
660 
661   for (count = 0; count < 50; count ++)
662   {
663     int size = 0;
664     UBool testresult = UNORM_YES;
665     UChar data[20];
666     UChar norm[100];
667     UChar nfd[100];
668     int normsize = 0;
669     int nfdsize = 0;
670 
671     while (size != 19) {
672       data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
673       log_verbose("0x%x", data[size]);
674       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
675                                   norm + normsize, 100 - normsize, &status);
676       if (U_FAILURE(status)) {
677         log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
678         break;
679       }
680       size ++;
681     }
682     log_verbose("\n");
683 
684     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
685                               nfd, 100, &status);
686     if (U_FAILURE(status)) {
687       log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
688     }
689 
690     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
691       testresult = UNORM_NO;
692     }
693     if (testresult == UNORM_YES) {
694       log_verbose("result UNORM_YES\n");
695     }
696     else {
697       log_verbose("result UNORM_NO\n");
698     }
699 
700     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
701       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
702     }
703   }
704 }
705 
706 static void
TestAPI()707 TestAPI() {
708     static const UChar in[]={ 0x68, 0xe4 };
709     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
710     UErrorCode errorCode;
711     int32_t length;
712 
713     /* try preflighting */
714     errorCode=U_ZERO_ERROR;
715     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
716     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
717         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
718         return;
719     }
720 
721     errorCode=U_ZERO_ERROR;
722     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
723     if(U_FAILURE(errorCode)) {
724         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
725         return;
726     }
727     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
728         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
729         return;
730     }
731     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
732     if(U_FAILURE(errorCode)) {
733         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
734         return;
735     }
736     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
737     if(U_FAILURE(errorCode)) {
738         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
739         return;
740     }
741 }
742 
743 /* test cases to improve test code coverage */
744 enum {
745     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
746     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
747     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
748 
749     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
750     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
751     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
752 
753     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
754     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
755 
756     MUSICAL_VOID_NOTEHEAD=0x1d157,
757     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
758     MUSICAL_STEM=0x1d165,       /* cc=216 */
759     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
760 };
761 
762 static void
TestNormCoverage()763 TestNormCoverage() {
764     UChar input[1000], expect[1000], output[1000];
765     UErrorCode errorCode;
766     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
767 
768     /* create a long and nasty string with NFKC-unsafe characters */
769     inLength=0;
770 
771     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
772     input[inLength++]=HANGUL_KIYEOK;
773     input[inLength++]=HANGUL_WEO;
774     input[inLength++]=HANGUL_KIYEOK_SIOS;
775 
776     input[inLength++]=HANGUL_KIYEOK;
777     input[inLength++]=HANGUL_WEO;
778     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
779 
780     input[inLength++]=HANGUL_KIYEOK;
781     input[inLength++]=HANGUL_K_WEO;
782     input[inLength++]=HANGUL_KIYEOK_SIOS;
783 
784     input[inLength++]=HANGUL_KIYEOK;
785     input[inLength++]=HANGUL_K_WEO;
786     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
787 
788     input[inLength++]=HANGUL_K_KIYEOK;
789     input[inLength++]=HANGUL_WEO;
790     input[inLength++]=HANGUL_KIYEOK_SIOS;
791 
792     input[inLength++]=HANGUL_K_KIYEOK;
793     input[inLength++]=HANGUL_WEO;
794     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
795 
796     input[inLength++]=HANGUL_K_KIYEOK;
797     input[inLength++]=HANGUL_K_WEO;
798     input[inLength++]=HANGUL_KIYEOK_SIOS;
799 
800     input[inLength++]=HANGUL_K_KIYEOK;
801     input[inLength++]=HANGUL_K_WEO;
802     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
803 
804     /* Hangul LV with normal/compatibility Jamo T */
805     input[inLength++]=HANGUL_AC00;
806     input[inLength++]=HANGUL_KIYEOK_SIOS;
807 
808     input[inLength++]=HANGUL_AC00;
809     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
810 
811     /* compatibility Jamo L, V */
812     input[inLength++]=HANGUL_K_KIYEOK;
813     input[inLength++]=HANGUL_K_WEO;
814 
815     hangulPrefixLength=inLength;
816 
817     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
818     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
819     for(i=0; i<200; ++i) {
820         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
821         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
822         input[inLength++]=U16_LEAD(MUSICAL_STEM);
823         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
824     }
825 
826     /* (compatibility) Jamo L, T do not compose */
827     input[inLength++]=HANGUL_K_KIYEOK;
828     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
829 
830     /* quick checks */
831     errorCode=U_ZERO_ERROR;
832     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
833         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
834     }
835     errorCode=U_ZERO_ERROR;
836     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
837         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
838     }
839     errorCode=U_ZERO_ERROR;
840     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
841         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
842     }
843     errorCode=U_ZERO_ERROR;
844     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
845         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
846     }
847     errorCode=U_ZERO_ERROR;
848     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
849         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
850     }
851 
852     /* NFKC */
853     expectLength=0;
854     expect[expectLength++]=HANGUL_SYLLABLE;
855 
856     expect[expectLength++]=HANGUL_SYLLABLE;
857 
858     expect[expectLength++]=HANGUL_SYLLABLE;
859 
860     expect[expectLength++]=HANGUL_SYLLABLE;
861 
862     expect[expectLength++]=HANGUL_SYLLABLE;
863 
864     expect[expectLength++]=HANGUL_SYLLABLE;
865 
866     expect[expectLength++]=HANGUL_SYLLABLE;
867 
868     expect[expectLength++]=HANGUL_SYLLABLE;
869 
870     expect[expectLength++]=HANGUL_AC00+3;
871 
872     expect[expectLength++]=HANGUL_AC00+3;
873 
874     expect[expectLength++]=HANGUL_AC00+14*28;
875 
876     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
877     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
878     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
879     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
880     for(i=0; i<200; ++i) {
881         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
882         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
883     }
884     for(i=0; i<200; ++i) {
885         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
886         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
887     }
888 
889     expect[expectLength++]=HANGUL_KIYEOK;
890     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
891 
892     /* try destination overflow first */
893     errorCode=U_ZERO_ERROR;
894     preflightLength=unorm_normalize(input, inLength,
895                            UNORM_NFKC, 0,
896                            output, 100, /* too short */
897                            &errorCode);
898     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
899         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
900     }
901 
902     /* real NFKC */
903     errorCode=U_ZERO_ERROR;
904     length=unorm_normalize(input, inLength,
905                            UNORM_NFKC, 0,
906                            output, UPRV_LENGTHOF(output),
907                            &errorCode);
908     if(U_FAILURE(errorCode)) {
909         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
910     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
911         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
912         for(i=0; i<length; ++i) {
913             if(output[i]!=expect[i]) {
914                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
915                 break;
916             }
917         }
918     }
919     if(length!=preflightLength) {
920         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
921     }
922 
923     /* FCD */
924     u_memcpy(expect, input, hangulPrefixLength);
925     expectLength=hangulPrefixLength;
926 
927     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
928     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
929     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
930     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
931     for(i=0; i<200; ++i) {
932         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
933         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
934     }
935     for(i=0; i<200; ++i) {
936         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
937         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
938     }
939 
940     expect[expectLength++]=HANGUL_K_KIYEOK;
941     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
942 
943     errorCode=U_ZERO_ERROR;
944     length=unorm_normalize(input, inLength,
945                            UNORM_FCD, 0,
946                            output, UPRV_LENGTHOF(output),
947                            &errorCode);
948     if(U_FAILURE(errorCode)) {
949         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
950     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
951         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
952         for(i=0; i<length; ++i) {
953             if(output[i]!=expect[i]) {
954                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
955                 break;
956             }
957         }
958     }
959 }
960 
961 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
962 static void
TestConcatenate(void)963 TestConcatenate(void) {
964     /* "re + 'sume'" */
965     static const UChar
966     left[]={
967         0x72, 0x65, 0
968     },
969     right[]={
970         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
971     },
972     expect[]={
973         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
974     };
975 
976     UChar buffer[100];
977     UErrorCode errorCode;
978     int32_t length;
979 
980     /* left with length, right NUL-terminated */
981     errorCode=U_ZERO_ERROR;
982     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
983     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
984         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
985     }
986 
987     /* preflighting */
988     errorCode=U_ZERO_ERROR;
989     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
990     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
991         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
992     }
993 
994     buffer[2]=0x5555;
995     errorCode=U_ZERO_ERROR;
996     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
997     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
998         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
999     }
1000 
1001     /* enter with U_FAILURE */
1002     buffer[2]=0xaaaa;
1003     errorCode=U_UNEXPECTED_TOKEN;
1004     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1005     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1006         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1007     }
1008 
1009     /* illegal arguments */
1010     buffer[2]=0xaaaa;
1011     errorCode=U_ZERO_ERROR;
1012     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1013     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1014         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1015     }
1016 
1017     errorCode=U_ZERO_ERROR;
1018     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1019     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1020         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1021     }
1022 }
1023 
1024 enum {
1025     _PLUS=0x2b
1026 };
1027 
1028 static const char *const _modeString[UNORM_MODE_COUNT]={
1029     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1030 };
1031 
1032 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1033 _testIter(const UChar *src, int32_t srcLength,
1034           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1035           const UChar *out, int32_t outLength,
1036           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1037     UChar buffer[4];
1038     const UChar *expect, *outLimit, *in;
1039     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1040     UErrorCode errorCode;
1041     UBool neededToNormalize, expectNeeded;
1042 
1043     errorCode=U_ZERO_ERROR;
1044     outLimit=out+outLength;
1045     if(forward) {
1046         expect=out;
1047         i=index=0;
1048     } else {
1049         expect=outLimit;
1050         i=srcIndexesLength-2;
1051         index=srcLength;
1052     }
1053 
1054     for(;;) {
1055         prevIndex=index;
1056         if(forward) {
1057             if(!iter->hasNext(iter)) {
1058                 return;
1059             }
1060             length=unorm_next(iter,
1061                               buffer, UPRV_LENGTHOF(buffer),
1062                               mode, 0,
1063                               (UBool)(out!=NULL), &neededToNormalize,
1064                               &errorCode);
1065             expectIndex=srcIndexes[i+1];
1066             in=src+prevIndex;
1067             inLength=expectIndex-prevIndex;
1068 
1069             if(out!=NULL) {
1070                 /* get output piece from between plus signs */
1071                 expectLength=0;
1072                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1073                     ++expectLength;
1074                 }
1075                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1076             } else {
1077                 expect=in;
1078                 expectLength=inLength;
1079                 expectNeeded=FALSE;
1080             }
1081         } else {
1082             if(!iter->hasPrevious(iter)) {
1083                 return;
1084             }
1085             length=unorm_previous(iter,
1086                                   buffer, UPRV_LENGTHOF(buffer),
1087                                   mode, 0,
1088                                   (UBool)(out!=NULL), &neededToNormalize,
1089                                   &errorCode);
1090             expectIndex=srcIndexes[i];
1091             in=src+expectIndex;
1092             inLength=prevIndex-expectIndex;
1093 
1094             if(out!=NULL) {
1095                 /* get output piece from between plus signs */
1096                 expectLength=0;
1097                 while(expect!=out && expect[-1]!=_PLUS) {
1098                     ++expectLength;
1099                     --expect;
1100                 }
1101                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1102             } else {
1103                 expect=in;
1104                 expectLength=inLength;
1105                 expectNeeded=FALSE;
1106             }
1107         }
1108         index=iter->getIndex(iter, UITER_CURRENT);
1109 
1110         if(U_FAILURE(errorCode)) {
1111             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1112                     forward, _modeString[mode], i, u_errorName(errorCode));
1113             return;
1114         }
1115         if(expectIndex!=index) {
1116             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1117                     forward, _modeString[mode], i, index, expectIndex);
1118             return;
1119         }
1120         if(expectLength!=length) {
1121             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1122                     forward, _modeString[mode], i, length, expectLength);
1123             return;
1124         }
1125         if(0!=u_memcmp(expect, buffer, length)) {
1126             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1127                     forward, _modeString[mode], i);
1128             return;
1129         }
1130         if(neededToNormalize!=expectNeeded) {
1131         }
1132 
1133         if(forward) {
1134             expect+=expectLength+1; /* go after the + */
1135             ++i;
1136         } else {
1137             --expect; /* go before the + */
1138             --i;
1139         }
1140     }
1141 }
1142 
1143 static void
TestNextPrevious()1144 TestNextPrevious() {
1145     static const UChar
1146     src[]={ /* input string */
1147         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1148     },
1149     nfd[]={ /* + separates expected output pieces */
1150         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1151     },
1152     nfkd[]={
1153         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1154     },
1155     nfc[]={
1156         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1157     },
1158     nfkc[]={
1159         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1160     },
1161     fcd[]={
1162         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1163     };
1164 
1165     /* expected iterator indexes in the source string for each iteration piece */
1166     static const int32_t
1167     nfdIndexes[]={
1168         0, 1, 2, 5, 6, 7
1169     },
1170     nfkdIndexes[]={
1171         0, 1, 2, 5, 6, 7
1172     },
1173     nfcIndexes[]={
1174         0, 1, 2, 5, 6, 7
1175     },
1176     nfkcIndexes[]={
1177         0, 1, 2, 5, 7
1178     },
1179     fcdIndexes[]={
1180         0, 1, 2, 5, 6, 7
1181     };
1182 
1183     UCharIterator iter;
1184 
1185     UChar buffer[4];
1186     int32_t length;
1187 
1188     UBool neededToNormalize;
1189     UErrorCode errorCode;
1190 
1191     uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1192 
1193     /* test iteration with doNormalize */
1194     iter.index=0;
1195     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1196     iter.index=0;
1197     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1198     iter.index=0;
1199     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1200     iter.index=0;
1201     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1202     iter.index=0;
1203     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1204 
1205     iter.index=iter.length;
1206     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1207     iter.index=iter.length;
1208     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1209     iter.index=iter.length;
1210     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1211     iter.index=iter.length;
1212     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1213     iter.index=iter.length;
1214     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1215 
1216     /* test iteration without doNormalize */
1217     iter.index=0;
1218     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1219     iter.index=0;
1220     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1221     iter.index=0;
1222     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1223     iter.index=0;
1224     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1225     iter.index=0;
1226     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1227 
1228     iter.index=iter.length;
1229     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1230     iter.index=iter.length;
1231     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1232     iter.index=iter.length;
1233     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1234     iter.index=iter.length;
1235     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1236     iter.index=iter.length;
1237     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1238 
1239     /* try without neededToNormalize */
1240     errorCode=U_ZERO_ERROR;
1241     buffer[0]=5;
1242     iter.index=1;
1243     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1244                       UNORM_NFD, 0, TRUE, NULL,
1245                       &errorCode);
1246     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1247         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1248         return;
1249     }
1250 
1251     /* preflight */
1252     neededToNormalize=9;
1253     iter.index=1;
1254     length=unorm_next(&iter, NULL, 0,
1255                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1256                       &errorCode);
1257     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1258         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1259         return;
1260     }
1261 
1262     errorCode=U_ZERO_ERROR;
1263     buffer[0]=buffer[1]=5;
1264     neededToNormalize=9;
1265     iter.index=1;
1266     length=unorm_next(&iter, buffer, 1,
1267                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1268                       &errorCode);
1269     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1270         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1271         return;
1272     }
1273 
1274     /* no iterator */
1275     errorCode=U_ZERO_ERROR;
1276     buffer[0]=buffer[1]=5;
1277     neededToNormalize=9;
1278     iter.index=1;
1279     length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1280                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1281                       &errorCode);
1282     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1283         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1284         return;
1285     }
1286 
1287     /* illegal mode */
1288     buffer[0]=buffer[1]=5;
1289     neededToNormalize=9;
1290     iter.index=1;
1291     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1292                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1293                       &errorCode);
1294     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1295         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1296         return;
1297     }
1298 
1299     /* error coming in */
1300     errorCode=U_MISPLACED_QUANTIFIER;
1301     buffer[0]=5;
1302     iter.index=1;
1303     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1304                       UNORM_NFD, 0, TRUE, NULL,
1305                       &errorCode);
1306     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1307         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1308         return;
1309     }
1310 }
1311 
1312 static void
TestFCNFKCClosure(void)1313 TestFCNFKCClosure(void) {
1314     static const struct {
1315         UChar32 c;
1316         const UChar s[6];
1317     } tests[]={
1318         { 0x00C4, { 0 } },
1319         { 0x00E4, { 0 } },
1320         { 0x037A, { 0x0020, 0x03B9, 0 } },
1321         { 0x03D2, { 0x03C5, 0 } },
1322         { 0x20A8, { 0x0072, 0x0073, 0 } },
1323         { 0x210B, { 0x0068, 0 } },
1324         { 0x210C, { 0x0068, 0 } },
1325         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1326         { 0x2122, { 0x0074, 0x006D, 0 } },
1327         { 0x2128, { 0x007A, 0 } },
1328         { 0x1D5DB, { 0x0068, 0 } },
1329         { 0x1D5ED, { 0x007A, 0 } },
1330         { 0x0061, { 0 } }
1331     };
1332 
1333     UChar buffer[8];
1334     UErrorCode errorCode;
1335     int32_t i, length;
1336 
1337     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1338         errorCode=U_ZERO_ERROR;
1339         length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1340         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1341             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1342         }
1343     }
1344 
1345     /* error handling */
1346     errorCode=U_ZERO_ERROR;
1347     length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1348     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1349         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1350     }
1351 
1352     length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1353     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1354         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1355     }
1356 }
1357 
1358 static void
TestQuickCheckPerCP()1359 TestQuickCheckPerCP() {
1360     UErrorCode errorCode;
1361     UChar32 c, lead, trail;
1362     UChar s[U16_MAX_LENGTH], nfd[16];
1363     int32_t length, lccc1, lccc2, tccc1, tccc2;
1364     int32_t qc1, qc2;
1365 
1366     if(
1367         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1368         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1369         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1370         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1371         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1372         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1373     ) {
1374         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1375     }
1376 
1377     /*
1378      * compare the quick check property values for some code points
1379      * to the quick check results for checking same-code point strings
1380      */
1381     errorCode=U_ZERO_ERROR;
1382     c=0;
1383     while(c<0x110000) {
1384         length=0;
1385         U16_APPEND_UNSAFE(s, length, c);
1386 
1387         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1388         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1389         if(qc1!=qc2) {
1390             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1391         }
1392 
1393         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1394         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1395         if(qc1!=qc2) {
1396             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1397         }
1398 
1399         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1400         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1401         if(qc1!=qc2) {
1402             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1403         }
1404 
1405         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1406         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1407         if(qc1!=qc2) {
1408             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1409         }
1410 
1411         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1412         /* length-length == 0 is used to get around a compiler warning. */
1413         U16_GET(nfd, 0, length-length, length, lead);
1414         U16_GET(nfd, 0, length-1, length, trail);
1415 
1416         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1417         lccc2=u_getCombiningClass(lead);
1418         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1419         tccc2=u_getCombiningClass(trail);
1420 
1421         if(lccc1!=lccc2) {
1422             log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1423                     lccc1, lccc2, c);
1424         }
1425         if(tccc1!=tccc2) {
1426             log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1427                     tccc1, tccc2, c);
1428         }
1429 
1430         /* skip some code points */
1431         c=(20*c)/19+1;
1432     }
1433 }
1434 
1435 static void
TestComposition(void)1436 TestComposition(void) {
1437     static const struct {
1438         UNormalizationMode mode;
1439         uint32_t options;
1440         UChar input[12];
1441         UChar expect[12];
1442     } cases[]={
1443         /*
1444          * special cases for UAX #15 bug
1445          * see Unicode Corrigendum #5: Normalization Idempotency
1446          * at http://unicode.org/versions/corrigendum5.html
1447          * (was Public Review Issue #29)
1448          */
1449         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1450         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1451         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1452         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1453 
1454         /* TODO: add test cases for UNORM_FCC here (j2151) */
1455     };
1456 
1457     UChar output[16];
1458     UErrorCode errorCode;
1459     int32_t i, length;
1460 
1461     for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1462         errorCode=U_ZERO_ERROR;
1463         length=unorm_normalize(
1464                     cases[i].input, -1,
1465                     cases[i].mode, cases[i].options,
1466                     output, UPRV_LENGTHOF(output),
1467                     &errorCode);
1468         if( U_FAILURE(errorCode) ||
1469             length!=u_strlen(cases[i].expect) ||
1470             0!=u_memcmp(output, cases[i].expect, length)
1471         ) {
1472             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1473         }
1474     }
1475 }
1476 
1477 static void
TestGetDecomposition()1478 TestGetDecomposition() {
1479     UChar decomp[32];
1480     int32_t length;
1481 
1482     UErrorCode errorCode=U_ZERO_ERROR;
1483     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1484     if(U_FAILURE(errorCode)) {
1485         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1486         return;
1487     }
1488 
1489     length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1490     if(U_FAILURE(errorCode) || length>=0) {
1491         log_err("unorm2_getDecomposition(fcc, space) failed\n");
1492     }
1493     errorCode=U_ZERO_ERROR;
1494     length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1495     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1496         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1497     }
1498     errorCode=U_ZERO_ERROR;
1499     length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1500     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1501         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1502     }
1503     errorCode=U_ZERO_ERROR;
1504     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1505     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1506         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1507     }
1508     errorCode=U_ZERO_ERROR;
1509     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1510     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1511         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1512     }
1513     errorCode=U_ZERO_ERROR;
1514     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1515     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1516         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1517     }
1518 }
1519 
1520 static void
TestGetRawDecomposition()1521 TestGetRawDecomposition() {
1522     UChar decomp[32];
1523     int32_t length;
1524 
1525     UErrorCode errorCode=U_ZERO_ERROR;
1526     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1527     if(U_FAILURE(errorCode)) {
1528         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1529         return;
1530     }
1531     /*
1532      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1533      * without recursive decomposition.
1534      */
1535 
1536     length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1537     if(U_FAILURE(errorCode) || length>=0) {
1538         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1539     }
1540     errorCode=U_ZERO_ERROR;
1541     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1542     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1543         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1544     }
1545     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1546     errorCode=U_ZERO_ERROR;
1547     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1548     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1549         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1550     }
1551     /* U+212B ANGSTROM SIGN */
1552     errorCode=U_ZERO_ERROR;
1553     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1554     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1555         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1556     }
1557     errorCode=U_ZERO_ERROR;
1558     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1559     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1560         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1561     }
1562     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1563     errorCode=U_ZERO_ERROR;
1564     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1565     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1566         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1567     }
1568     errorCode=U_ZERO_ERROR;
1569     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1570     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1571         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1572     }
1573     errorCode=U_ZERO_ERROR;
1574     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1575     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1576         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1577     }
1578     errorCode=U_ZERO_ERROR;
1579     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1580     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1581         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1582     }
1583 }
1584 
1585 static void
TestAppendRestoreMiddle()1586 TestAppendRestoreMiddle() {
1587     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1588     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1589     /* NFC: C5 is 'A with ring above' */
1590     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1591     int32_t length;
1592     UErrorCode errorCode=U_ZERO_ERROR;
1593     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1594     if(U_FAILURE(errorCode)) {
1595         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1596         return;
1597     }
1598     /*
1599      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1600      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1601      * still fits into a[] but the full result still overflows this capacity.
1602      * (Let it modify the destination buffer before reallocating internally.)
1603      */
1604     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1605     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1606         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1607         return;
1608     }
1609     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1610     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1611         log_err("unorm2_append(overflow) modified the first string\n");
1612         return;
1613     }
1614     errorCode=U_ZERO_ERROR;
1615     length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1616     if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1617         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1618         return;
1619     }
1620 }
1621 
1622 static void
TestGetEasyToUseInstance()1623 TestGetEasyToUseInstance() {
1624     static const UChar in[]={
1625         0xA0,  /* -> <noBreak> 0020 */
1626         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1627     };
1628     UChar out[32];
1629     int32_t length;
1630 
1631     UErrorCode errorCode=U_ZERO_ERROR;
1632     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1633     if(U_FAILURE(errorCode)) {
1634         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1635         return;
1636     }
1637     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1638     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1639         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1640                 (int)length, u_errorName(errorCode));
1641     }
1642 
1643     errorCode=U_ZERO_ERROR;
1644     n2=unorm2_getNFDInstance(&errorCode);
1645     if(U_FAILURE(errorCode)) {
1646         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1647         return;
1648     }
1649     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1650     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1651         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1652                 (int)length, u_errorName(errorCode));
1653     }
1654 
1655     errorCode=U_ZERO_ERROR;
1656     n2=unorm2_getNFKCInstance(&errorCode);
1657     if(U_FAILURE(errorCode)) {
1658         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1659         return;
1660     }
1661     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1662     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1663         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1664                 (int)length, u_errorName(errorCode));
1665     }
1666 
1667     errorCode=U_ZERO_ERROR;
1668     n2=unorm2_getNFKDInstance(&errorCode);
1669     if(U_FAILURE(errorCode)) {
1670         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1671         return;
1672     }
1673     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1674     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1675         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1676                 (int)length, u_errorName(errorCode));
1677     }
1678 
1679     errorCode=U_ZERO_ERROR;
1680     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1681     if(U_FAILURE(errorCode)) {
1682         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1683         return;
1684     }
1685     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1686     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1687         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1688                 (int)length, u_errorName(errorCode));
1689     }
1690 }
1691 
1692 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1693