• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 *     synwee                      added test for quick check
16 *     synwee                      added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 
25 #if !UCONFIG_NO_NORMALIZATION
26 
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33 
34 static void
35 TestAPI(void);
36 
37 static void
38 TestNormCoverage(void);
39 
40 static void
41 TestConcatenate(void);
42 
43 static void
44 TestNextPrevious(void);
45 
46 static void TestIsNormalized(void);
47 
48 static void
49 TestFCNFKCClosure(void);
50 
51 static void
52 TestQuickCheckPerCP(void);
53 
54 static void
55 TestComposition(void);
56 
57 static void
58 TestFCD(void);
59 
60 static void
61 TestGetDecomposition(void);
62 
63 static void
64 TestGetRawDecomposition(void);
65 
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68 
69 static const char* const canonTests[][3] = {
70     /* Input*/                    /*Decomposed*/                /*Composed*/
71     { "cat",                    "cat",                        "cat"                    },
72     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
73 
74     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
75     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
76 
77     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
78     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
79     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
80 
81     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
82     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
83 
84     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
85     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
86     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
87 
88     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
89     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
90 
91     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
92     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
93 
94     { "Henry IV",                "Henry IV",                    "Henry IV"                },
95     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
96 
97     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
98     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
99     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
100     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
101     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
102     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
103     { "", "", "" }
104 };
105 
106 static const char* const compatTests[][3] = {
107     /* Input*/                        /*Decomposed    */                /*Composed*/
108     { "cat",                        "cat",                            "cat"                },
109 
110     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
111 
112     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
113     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
114 
115     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
116     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
117 
118     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
119     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
120 
121     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
122 
123     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
125     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
126     { "", "", "" }
127 };
128 
129 static const char* const fcdTests[][3] = {
130     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
132     { "\\u010e", "\\u010e", NULL }  /* D-caron */
133 };
134 
135 void addNormTest(TestNode** root);
136 
addNormTest(TestNode ** root)137 void addNormTest(TestNode** root)
138 {
139     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 }
160 
161 static const char* const modeStrings[]={
162     "?",
163     "UNORM_NONE",
164     "UNORM_NFD",
165     "UNORM_NFKD",
166     "UNORM_NFC",
167     "UNORM_NFKC",
168     "UNORM_FCD",
169     "UNORM_MODE_COUNT"
170 };
171 
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)172 static void TestNormCases(UNormalizationMode mode,
173                           const char* const cases[][3], int32_t lengthOfCases) {
174     int32_t x, neededLen, length2;
175     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
176     UChar *source=NULL;
177     UChar result[16];
178     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
179     for(x=0; x < lengthOfCases; x++)
180     {
181         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
182         source=CharsToUChars(cases[x][0]);
183         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
184         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
185         if(neededLen!=length2) {
186           log_err("ERROR in unorm_normalize(%s)[%d]: "
187                   "preflight length/srcLength %d!=%d preflight length/NUL\n",
188                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
189         }
190         if(status==U_BUFFER_OVERFLOW_ERROR)
191         {
192             status=U_ZERO_ERROR;
193         }
194         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
195         if(U_FAILURE(status) || neededLen!=length2) {
196             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
197                          modeStrings[mode], austrdup(source), myErrorName(status));
198         } else {
199             assertEqual(result, cases[x][expIndex], x);
200         }
201         length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
202         if(U_FAILURE(status) || neededLen!=length2) {
203             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
204                          modeStrings[mode], austrdup(source), myErrorName(status));
205         } else {
206             assertEqual(result, cases[x][expIndex], x);
207         }
208         free(source);
209     }
210 }
211 
TestDecomp()212 void TestDecomp() {
213     TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
214 }
215 
TestCompatDecomp()216 void TestCompatDecomp() {
217     TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
218 }
219 
TestCanonDecompCompose()220 void TestCanonDecompCompose() {
221     TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
222 }
223 
TestCompatDecompCompose()224 void TestCompatDecompCompose() {
225     TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
226 }
227 
TestFCD()228 void TestFCD() {
229     TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
230 }
231 
assertEqual(const UChar * result,const char * expected,int32_t index)232 static void assertEqual(const UChar* result, const char* expected, int32_t index)
233 {
234     UChar *expectedUni = CharsToUChars(expected);
235     if(u_strcmp(result, expectedUni)!=0){
236         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
237             austrdup(result) );
238     }
239     free(expectedUni);
240 }
241 
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)242 static void TestNull_check(UChar *src, int32_t srcLen,
243                     UChar *exp, int32_t expLen,
244                     UNormalizationMode mode,
245                     const char *name)
246 {
247     UErrorCode status = U_ZERO_ERROR;
248     int32_t len, i;
249 
250     UChar   result[50];
251 
252 
253     status = U_ZERO_ERROR;
254 
255     for(i=0;i<50;i++)
256       {
257         result[i] = 0xFFFD;
258       }
259 
260     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
261 
262     if(U_FAILURE(status)) {
263       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
264     } else if (len != expLen) {
265       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
266     }
267 
268     {
269       for(i=0;i<len;i++){
270         if(exp[i] != result[i]) {
271           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
272                   name,
273                   i,
274                   exp[i],
275                   result[i]);
276           return;
277         }
278         log_verbose("     %d: \\u%04X\n", i, result[i]);
279       }
280     }
281 
282     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
283 }
284 
TestNull()285 void TestNull()
286 {
287 
288     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
289     int32_t source_comp_len = 4;
290     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
291     int32_t expect_comp_len = 3;
292 
293     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
294     int32_t source_dcmp_len = 3;
295     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
296     int32_t expect_dcmp_len = 5;
297 
298     TestNull_check(source_comp,
299                    source_comp_len,
300                    expect_comp,
301                    expect_comp_len,
302                    UNORM_NFC,
303                    "UNORM_NFC");
304 
305     TestNull_check(source_dcmp,
306                    source_dcmp_len,
307                    expect_dcmp,
308                    expect_dcmp_len,
309                    UNORM_NFD,
310                    "UNORM_NFD");
311 
312     TestNull_check(source_comp,
313                    source_comp_len,
314                    expect_comp,
315                    expect_comp_len,
316                    UNORM_NFKC,
317                    "UNORM_NFKC");
318 
319 
320 }
321 
TestQuickCheckResultNO()322 static void TestQuickCheckResultNO()
323 {
324   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
325                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
326   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
327                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
328   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
329                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
331                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
332 
333 
334   const int SIZE = 10;
335 
336   int count = 0;
337   UErrorCode error = U_ZERO_ERROR;
338 
339   for (; count < SIZE; count ++)
340   {
341     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
342                                                               UNORM_NO)
343     {
344       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
345       return;
346     }
347     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
348                                                               UNORM_NO)
349     {
350       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
351       return;
352     }
353     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
354                                                               UNORM_NO)
355     {
356       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
357       return;
358     }
359     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
360                                                               UNORM_NO)
361     {
362       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
363       return;
364     }
365   }
366 }
367 
368 
TestQuickCheckResultYES()369 static void TestQuickCheckResultYES()
370 {
371   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
372                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
373   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
374                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
375   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
376                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
377   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
378                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
379 
380   const int SIZE = 10;
381   int count = 0;
382   UErrorCode error = U_ZERO_ERROR;
383 
384   UChar cp = 0;
385   while (cp < 0xA0)
386   {
387     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
388     {
389       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
390       return;
391     }
392     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
393                                                              UNORM_YES)
394     {
395       log_err("ERROR in NFC quick check at U+%04x\n", cp);
396       return;
397     }
398     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
399     {
400       log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
401       return;
402     }
403     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
404                                                              UNORM_YES)
405     {
406       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
407       return;
408     }
409     cp ++;
410   }
411 
412   for (; count < SIZE; count ++)
413   {
414     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
415                                                              UNORM_YES)
416     {
417       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
418       return;
419     }
420     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
421                                                           != UNORM_YES)
422     {
423       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
424       return;
425     }
426     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
427                                                              UNORM_YES)
428     {
429       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
430       return;
431     }
432     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
433                                                              UNORM_YES)
434     {
435       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
436       return;
437     }
438   }
439 }
440 
TestQuickCheckResultMAYBE()441 static void TestQuickCheckResultMAYBE()
442 {
443   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
444                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
445   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
446                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
447 
448 
449   const int SIZE = 10;
450 
451   int count = 0;
452   UErrorCode error = U_ZERO_ERROR;
453 
454   /* NFD and NFKD does not have any MAYBE codepoints */
455   for (; count < SIZE; count ++)
456   {
457     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
458                                                            UNORM_MAYBE)
459     {
460       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
461       return;
462     }
463     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
464                                                            UNORM_MAYBE)
465     {
466       log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
467       return;
468     }
469   }
470 }
471 
TestQuickCheckStringResult()472 static void TestQuickCheckStringResult()
473 {
474   int count;
475   UChar *d = NULL;
476   UChar *c = NULL;
477   UErrorCode error = U_ZERO_ERROR;
478 
479   for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
480   {
481     d = CharsToUChars(canonTests[count][1]);
482     c = CharsToUChars(canonTests[count][2]);
483     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
484                                                             UNORM_YES)
485     {
486       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
487       free(d); free(c);
488       return;
489     }
490 
491     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
492                                                             UNORM_NO)
493     {
494       log_err("ERROR in NFC quick check for string at count %d\n", count);
495       free(d); free(c);
496       return;
497     }
498 
499     free(d);
500     free(c);
501   }
502 
503   for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
504   {
505     d = CharsToUChars(compatTests[count][1]);
506     c = CharsToUChars(compatTests[count][2]);
507     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
508                                                             UNORM_YES)
509     {
510       log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
511       free(d); free(c);
512       return;
513     }
514 
515     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
516                                                             UNORM_YES)
517     {
518       log_err("ERROR in NFKC quick check for string at count %d\n", count);
519       free(d); free(c);
520       return;
521     }
522 
523     free(d);
524     free(c);
525   }
526 }
527 
TestQuickCheck()528 void TestQuickCheck()
529 {
530   TestQuickCheckResultNO();
531   TestQuickCheckResultYES();
532   TestQuickCheckResultMAYBE();
533   TestQuickCheckStringResult();
534 }
535 
536 /*
537  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
538  * normalized, and some that are not.
539  * Here we pick some specific cases and test the C API.
540  */
TestIsNormalized(void)541 static void TestIsNormalized(void) {
542     static const UChar notNFC[][8]={            /* strings that are not in NFC */
543         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
544         { 0xfb1d, 0 },                          /* excluded from composition */
545         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
546         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
547     };
548     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
549         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
550         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
551         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
552     };
553 
554     int32_t i;
555     UErrorCode errorCode;
556 
557     /* API test */
558 
559     /* normal case with length>=0 (length -1 used for special cases below) */
560     errorCode=U_ZERO_ERROR;
561     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
562         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
563     }
564 
565     /* incoming U_FAILURE */
566     errorCode=U_TRUNCATED_CHAR_FOUND;
567     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
568     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
569         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
570     }
571 
572     /* NULL source */
573     errorCode=U_ZERO_ERROR;
574     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
575     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
576         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
577     }
578 
579     /* bad length */
580     errorCode=U_ZERO_ERROR;
581     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
582     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
583         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
584     }
585 
586     /* specific cases */
587     for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
588         errorCode=U_ZERO_ERROR;
589         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
590             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
591         }
592         errorCode=U_ZERO_ERROR;
593         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
594             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
595         }
596     }
597     for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
598         errorCode=U_ZERO_ERROR;
599         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
600             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
601         }
602     }
603 }
604 
TestCheckFCD()605 void TestCheckFCD()
606 {
607   UErrorCode status = U_ZERO_ERROR;
608   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
609                          0x0A};
610   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
611                           0x02B9, 0x0314, 0x0315, 0x0316};
612   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
613                          0x0050, 0x0730, 0x09EE, 0x1E10};
614 
615   static const UChar datastr[][5] =
616   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
617     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
618     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
619     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
620   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
621 
622   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
623                             0x6a,
624                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
625                             0xea,
626                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
627                             0x0307, 0x0308, 0x0309, 0x030a,
628                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
629                             0x0327, 0x0328, 0x0329, 0x032a,
630                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
631                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
632 
633   int count = 0;
634 
635   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
636     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
637   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
638     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
639   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
640     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
641 
642   if (U_FAILURE(status))
643     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
644 
645   while (count < 4)
646   {
647     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
648     if (U_FAILURE(status)) {
649       log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
650       break;
651     }
652     else {
653       if (result[count] != fcdresult) {
654         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
655                  result[count]);
656       }
657     }
658     count ++;
659   }
660 
661   /* random checks of long strings */
662   status = U_ZERO_ERROR;
663   srand((unsigned)time( NULL ));
664 
665   for (count = 0; count < 50; count ++)
666   {
667     int size = 0;
668     UNormalizationCheckResult testresult = UNORM_YES;
669     UChar data[20];
670     UChar norm[100];
671     UChar nfd[100];
672     int normsize = 0;
673     int nfdsize = 0;
674 
675     while (size != 19) {
676       data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
677       log_verbose("0x%x", data[size]);
678       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
679                                   norm + normsize, 100 - normsize, &status);
680       if (U_FAILURE(status)) {
681         log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
682         break;
683       }
684       size ++;
685     }
686     log_verbose("\n");
687 
688     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
689                               nfd, 100, &status);
690     if (U_FAILURE(status)) {
691       log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
692     }
693 
694     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
695       testresult = UNORM_NO;
696     }
697     if (testresult == UNORM_YES) {
698       log_verbose("result UNORM_YES\n");
699     }
700     else {
701       log_verbose("result UNORM_NO\n");
702     }
703 
704     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
705       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
706     }
707   }
708 }
709 
710 static void
TestAPI()711 TestAPI() {
712     static const UChar in[]={ 0x68, 0xe4 };
713     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
714     UErrorCode errorCode;
715     int32_t length;
716 
717     /* try preflighting */
718     errorCode=U_ZERO_ERROR;
719     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
720     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
721         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
722         return;
723     }
724 
725     errorCode=U_ZERO_ERROR;
726     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
727     if(U_FAILURE(errorCode)) {
728         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
729         return;
730     }
731     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
732         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
733         return;
734     }
735     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
736     if(U_FAILURE(errorCode)) {
737         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
738         return;
739     }
740     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
741     if(U_FAILURE(errorCode)) {
742         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
743         return;
744     }
745 }
746 
747 /* test cases to improve test code coverage */
748 enum {
749     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
750     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
751     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
752 
753     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
754     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
755     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
756 
757     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
758     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
759 
760     MUSICAL_VOID_NOTEHEAD=0x1d157,
761     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
762     MUSICAL_STEM=0x1d165,       /* cc=216 */
763     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
764 };
765 
766 static void
TestNormCoverage()767 TestNormCoverage() {
768     UChar input[1000], expect[1000], output[1000];
769     UErrorCode errorCode;
770     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
771 
772     /* create a long and nasty string with NFKC-unsafe characters */
773     inLength=0;
774 
775     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
776     input[inLength++]=HANGUL_KIYEOK;
777     input[inLength++]=HANGUL_WEO;
778     input[inLength++]=HANGUL_KIYEOK_SIOS;
779 
780     input[inLength++]=HANGUL_KIYEOK;
781     input[inLength++]=HANGUL_WEO;
782     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
783 
784     input[inLength++]=HANGUL_KIYEOK;
785     input[inLength++]=HANGUL_K_WEO;
786     input[inLength++]=HANGUL_KIYEOK_SIOS;
787 
788     input[inLength++]=HANGUL_KIYEOK;
789     input[inLength++]=HANGUL_K_WEO;
790     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
791 
792     input[inLength++]=HANGUL_K_KIYEOK;
793     input[inLength++]=HANGUL_WEO;
794     input[inLength++]=HANGUL_KIYEOK_SIOS;
795 
796     input[inLength++]=HANGUL_K_KIYEOK;
797     input[inLength++]=HANGUL_WEO;
798     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
799 
800     input[inLength++]=HANGUL_K_KIYEOK;
801     input[inLength++]=HANGUL_K_WEO;
802     input[inLength++]=HANGUL_KIYEOK_SIOS;
803 
804     input[inLength++]=HANGUL_K_KIYEOK;
805     input[inLength++]=HANGUL_K_WEO;
806     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
807 
808     /* Hangul LV with normal/compatibility Jamo T */
809     input[inLength++]=HANGUL_AC00;
810     input[inLength++]=HANGUL_KIYEOK_SIOS;
811 
812     input[inLength++]=HANGUL_AC00;
813     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
814 
815     /* compatibility Jamo L, V */
816     input[inLength++]=HANGUL_K_KIYEOK;
817     input[inLength++]=HANGUL_K_WEO;
818 
819     hangulPrefixLength=inLength;
820 
821     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
822     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
823     for(i=0; i<200; ++i) {
824         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
825         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
826         input[inLength++]=U16_LEAD(MUSICAL_STEM);
827         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
828     }
829 
830     /* (compatibility) Jamo L, T do not compose */
831     input[inLength++]=HANGUL_K_KIYEOK;
832     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
833 
834     /* quick checks */
835     errorCode=U_ZERO_ERROR;
836     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
837         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
838     }
839     errorCode=U_ZERO_ERROR;
840     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
841         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
842     }
843     errorCode=U_ZERO_ERROR;
844     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
845         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
846     }
847     errorCode=U_ZERO_ERROR;
848     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
849         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
850     }
851     errorCode=U_ZERO_ERROR;
852     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
853         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
854     }
855 
856     /* NFKC */
857     expectLength=0;
858     expect[expectLength++]=HANGUL_SYLLABLE;
859 
860     expect[expectLength++]=HANGUL_SYLLABLE;
861 
862     expect[expectLength++]=HANGUL_SYLLABLE;
863 
864     expect[expectLength++]=HANGUL_SYLLABLE;
865 
866     expect[expectLength++]=HANGUL_SYLLABLE;
867 
868     expect[expectLength++]=HANGUL_SYLLABLE;
869 
870     expect[expectLength++]=HANGUL_SYLLABLE;
871 
872     expect[expectLength++]=HANGUL_SYLLABLE;
873 
874     expect[expectLength++]=HANGUL_AC00+3;
875 
876     expect[expectLength++]=HANGUL_AC00+3;
877 
878     expect[expectLength++]=HANGUL_AC00+14*28;
879 
880     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
881     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
882     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
883     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
884     for(i=0; i<200; ++i) {
885         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
886         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
887     }
888     for(i=0; i<200; ++i) {
889         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
890         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
891     }
892 
893     expect[expectLength++]=HANGUL_KIYEOK;
894     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
895 
896     /* try destination overflow first */
897     errorCode=U_ZERO_ERROR;
898     preflightLength=unorm_normalize(input, inLength,
899                            UNORM_NFKC, 0,
900                            output, 100, /* too short */
901                            &errorCode);
902     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
903         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
904     }
905 
906     /* real NFKC */
907     errorCode=U_ZERO_ERROR;
908     length=unorm_normalize(input, inLength,
909                            UNORM_NFKC, 0,
910                            output, UPRV_LENGTHOF(output),
911                            &errorCode);
912     if(U_FAILURE(errorCode)) {
913         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
914     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
915         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
916         for(i=0; i<length; ++i) {
917             if(output[i]!=expect[i]) {
918                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
919                 break;
920             }
921         }
922     }
923     if(length!=preflightLength) {
924         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
925     }
926 
927     /* FCD */
928     u_memcpy(expect, input, hangulPrefixLength);
929     expectLength=hangulPrefixLength;
930 
931     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
932     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
933     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
934     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
935     for(i=0; i<200; ++i) {
936         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
937         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
938     }
939     for(i=0; i<200; ++i) {
940         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
941         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
942     }
943 
944     expect[expectLength++]=HANGUL_K_KIYEOK;
945     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
946 
947     errorCode=U_ZERO_ERROR;
948     length=unorm_normalize(input, inLength,
949                            UNORM_FCD, 0,
950                            output, UPRV_LENGTHOF(output),
951                            &errorCode);
952     if(U_FAILURE(errorCode)) {
953         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
954     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
955         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
956         for(i=0; i<length; ++i) {
957             if(output[i]!=expect[i]) {
958                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
959                 break;
960             }
961         }
962     }
963 }
964 
965 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
966 static void
TestConcatenate(void)967 TestConcatenate(void) {
968     /* "re + 'sume'" */
969     static const UChar
970     left[]={
971         0x72, 0x65, 0
972     },
973     right[]={
974         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
975     },
976     expect[]={
977         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
978     };
979 
980     UChar buffer[100];
981     UErrorCode errorCode;
982     int32_t length;
983 
984     /* left with length, right NUL-terminated */
985     errorCode=U_ZERO_ERROR;
986     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
987     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
988         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
989     }
990 
991     /* preflighting */
992     errorCode=U_ZERO_ERROR;
993     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
994     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
995         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
996     }
997 
998     buffer[2]=0x5555;
999     errorCode=U_ZERO_ERROR;
1000     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1001     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1002         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1003     }
1004 
1005     /* enter with U_FAILURE */
1006     buffer[2]=0xaaaa;
1007     errorCode=U_UNEXPECTED_TOKEN;
1008     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1009     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1010         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1011     }
1012 
1013     /* illegal arguments */
1014     buffer[2]=0xaaaa;
1015     errorCode=U_ZERO_ERROR;
1016     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1017     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1018         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1019     }
1020 
1021     errorCode=U_ZERO_ERROR;
1022     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1023     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1024         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1025     }
1026 }
1027 
1028 enum {
1029     _PLUS=0x2b
1030 };
1031 
1032 static const char *const _modeString[UNORM_MODE_COUNT]={
1033     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1034 };
1035 
1036 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1037 _testIter(const UChar *src, int32_t srcLength,
1038           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1039           const UChar *out, int32_t outLength,
1040           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1041     UChar buffer[4];
1042     const UChar *expect, *outLimit, *in;
1043     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1044     UErrorCode errorCode;
1045     UBool neededToNormalize, expectNeeded;
1046 
1047     errorCode=U_ZERO_ERROR;
1048     outLimit=out+outLength;
1049     if(forward) {
1050         expect=out;
1051         i=index=0;
1052     } else {
1053         expect=outLimit;
1054         i=srcIndexesLength-2;
1055         index=srcLength;
1056     }
1057 
1058     for(;;) {
1059         prevIndex=index;
1060         if(forward) {
1061             if(!iter->hasNext(iter)) {
1062                 return;
1063             }
1064             length=unorm_next(iter,
1065                               buffer, UPRV_LENGTHOF(buffer),
1066                               mode, 0,
1067                               (UBool)(out!=NULL), &neededToNormalize,
1068                               &errorCode);
1069             expectIndex=srcIndexes[i+1];
1070             in=src+prevIndex;
1071             inLength=expectIndex-prevIndex;
1072 
1073             if(out!=NULL) {
1074                 /* get output piece from between plus signs */
1075                 expectLength=0;
1076                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1077                     ++expectLength;
1078                 }
1079                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1080             } else {
1081                 expect=in;
1082                 expectLength=inLength;
1083                 expectNeeded=FALSE;
1084             }
1085         } else {
1086             if(!iter->hasPrevious(iter)) {
1087                 return;
1088             }
1089             length=unorm_previous(iter,
1090                                   buffer, UPRV_LENGTHOF(buffer),
1091                                   mode, 0,
1092                                   (UBool)(out!=NULL), &neededToNormalize,
1093                                   &errorCode);
1094             expectIndex=srcIndexes[i];
1095             in=src+expectIndex;
1096             inLength=prevIndex-expectIndex;
1097 
1098             if(out!=NULL) {
1099                 /* get output piece from between plus signs */
1100                 expectLength=0;
1101                 while(expect!=out && expect[-1]!=_PLUS) {
1102                     ++expectLength;
1103                     --expect;
1104                 }
1105                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1106             } else {
1107                 expect=in;
1108                 expectLength=inLength;
1109                 expectNeeded=FALSE;
1110             }
1111         }
1112         index=iter->getIndex(iter, UITER_CURRENT);
1113 
1114         if(U_FAILURE(errorCode)) {
1115             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1116                     forward, _modeString[mode], i, u_errorName(errorCode));
1117             return;
1118         }
1119         if(expectIndex!=index) {
1120             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1121                     forward, _modeString[mode], i, index, expectIndex);
1122             return;
1123         }
1124         if(expectLength!=length) {
1125             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1126                     forward, _modeString[mode], i, length, expectLength);
1127             return;
1128         }
1129         if(0!=u_memcmp(expect, buffer, length)) {
1130             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1131                     forward, _modeString[mode], i);
1132             return;
1133         }
1134         if(neededToNormalize!=expectNeeded) {
1135         }
1136 
1137         if(forward) {
1138             expect+=expectLength+1; /* go after the + */
1139             ++i;
1140         } else {
1141             --expect; /* go before the + */
1142             --i;
1143         }
1144     }
1145 }
1146 
1147 static void
TestNextPrevious()1148 TestNextPrevious() {
1149     static const UChar
1150     src[]={ /* input string */
1151         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1152     },
1153     nfd[]={ /* + separates expected output pieces */
1154         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1155     },
1156     nfkd[]={
1157         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1158     },
1159     nfc[]={
1160         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1161     },
1162     nfkc[]={
1163         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1164     },
1165     fcd[]={
1166         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1167     };
1168 
1169     /* expected iterator indexes in the source string for each iteration piece */
1170     static const int32_t
1171     nfdIndexes[]={
1172         0, 1, 2, 5, 6, 7
1173     },
1174     nfkdIndexes[]={
1175         0, 1, 2, 5, 6, 7
1176     },
1177     nfcIndexes[]={
1178         0, 1, 2, 5, 6, 7
1179     },
1180     nfkcIndexes[]={
1181         0, 1, 2, 5, 7
1182     },
1183     fcdIndexes[]={
1184         0, 1, 2, 5, 6, 7
1185     };
1186 
1187     UCharIterator iter;
1188 
1189     UChar buffer[4];
1190     int32_t length;
1191 
1192     UBool neededToNormalize;
1193     UErrorCode errorCode;
1194 
1195     uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1196 
1197     /* test iteration with doNormalize */
1198     iter.index=0;
1199     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1200     iter.index=0;
1201     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1202     iter.index=0;
1203     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1204     iter.index=0;
1205     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1206     iter.index=0;
1207     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1208 
1209     iter.index=iter.length;
1210     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1211     iter.index=iter.length;
1212     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1213     iter.index=iter.length;
1214     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1215     iter.index=iter.length;
1216     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1217     iter.index=iter.length;
1218     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1219 
1220     /* test iteration without doNormalize */
1221     iter.index=0;
1222     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1223     iter.index=0;
1224     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1225     iter.index=0;
1226     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1227     iter.index=0;
1228     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1229     iter.index=0;
1230     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1231 
1232     iter.index=iter.length;
1233     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1234     iter.index=iter.length;
1235     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1236     iter.index=iter.length;
1237     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1238     iter.index=iter.length;
1239     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1240     iter.index=iter.length;
1241     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1242 
1243     /* try without neededToNormalize */
1244     errorCode=U_ZERO_ERROR;
1245     buffer[0]=5;
1246     iter.index=1;
1247     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1248                       UNORM_NFD, 0, TRUE, NULL,
1249                       &errorCode);
1250     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1251         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1252         return;
1253     }
1254 
1255     /* preflight */
1256     neededToNormalize=9;
1257     iter.index=1;
1258     length=unorm_next(&iter, NULL, 0,
1259                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1260                       &errorCode);
1261     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1262         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1263         return;
1264     }
1265 
1266     errorCode=U_ZERO_ERROR;
1267     buffer[0]=buffer[1]=5;
1268     neededToNormalize=9;
1269     iter.index=1;
1270     length=unorm_next(&iter, buffer, 1,
1271                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1272                       &errorCode);
1273     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1274         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1275         return;
1276     }
1277 
1278     /* no iterator */
1279     errorCode=U_ZERO_ERROR;
1280     buffer[0]=buffer[1]=5;
1281     neededToNormalize=9;
1282     iter.index=1;
1283     length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1284                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1285                       &errorCode);
1286     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1287         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1288         return;
1289     }
1290 
1291     /* illegal mode */
1292     buffer[0]=buffer[1]=5;
1293     neededToNormalize=9;
1294     iter.index=1;
1295     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1296                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1297                       &errorCode);
1298     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1299         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1300         return;
1301     }
1302 
1303     /* error coming in */
1304     errorCode=U_MISPLACED_QUANTIFIER;
1305     buffer[0]=5;
1306     iter.index=1;
1307     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1308                       UNORM_NFD, 0, TRUE, NULL,
1309                       &errorCode);
1310     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1311         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1312         return;
1313     }
1314 }
1315 
1316 static void
TestFCNFKCClosure(void)1317 TestFCNFKCClosure(void) {
1318     static const struct {
1319         UChar32 c;
1320         const UChar s[6];
1321     } tests[]={
1322         { 0x00C4, { 0 } },
1323         { 0x00E4, { 0 } },
1324         { 0x037A, { 0x0020, 0x03B9, 0 } },
1325         { 0x03D2, { 0x03C5, 0 } },
1326         { 0x20A8, { 0x0072, 0x0073, 0 } },
1327         { 0x210B, { 0x0068, 0 } },
1328         { 0x210C, { 0x0068, 0 } },
1329         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1330         { 0x2122, { 0x0074, 0x006D, 0 } },
1331         { 0x2128, { 0x007A, 0 } },
1332         { 0x1D5DB, { 0x0068, 0 } },
1333         { 0x1D5ED, { 0x007A, 0 } },
1334         { 0x0061, { 0 } }
1335     };
1336 
1337     UChar buffer[8];
1338     UErrorCode errorCode;
1339     int32_t i, length;
1340 
1341     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1342         errorCode=U_ZERO_ERROR;
1343         length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1344         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1345             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1346         }
1347     }
1348 
1349     /* error handling */
1350     errorCode=U_ZERO_ERROR;
1351     length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1352     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1353         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1354     }
1355 
1356     length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1357     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1358         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1359     }
1360 }
1361 
1362 static void
TestQuickCheckPerCP()1363 TestQuickCheckPerCP() {
1364     UErrorCode errorCode;
1365     UChar32 c, lead, trail;
1366     UChar s[U16_MAX_LENGTH], nfd[16];
1367     int32_t length, lccc1, lccc2, tccc1, tccc2;
1368     int32_t qc1, qc2;
1369 
1370     if(
1371         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1372         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1373         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1374         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1375         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1376         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1377     ) {
1378         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1379     }
1380 
1381     /*
1382      * compare the quick check property values for some code points
1383      * to the quick check results for checking same-code point strings
1384      */
1385     errorCode=U_ZERO_ERROR;
1386     c=0;
1387     while(c<0x110000) {
1388         length=0;
1389         U16_APPEND_UNSAFE(s, length, c);
1390 
1391         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1392         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1393         if(qc1!=qc2) {
1394             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1395         }
1396 
1397         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1398         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1399         if(qc1!=qc2) {
1400             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1401         }
1402 
1403         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1404         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1405         if(qc1!=qc2) {
1406             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1407         }
1408 
1409         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1410         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1411         if(qc1!=qc2) {
1412             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1413         }
1414 
1415         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1416         if (U_FAILURE(errorCode)) {
1417             log_data_err("%s:%d errorCode=%s\n", __FILE__, __LINE__, u_errorName(errorCode));
1418             break;
1419         }
1420 
1421         /* length-length == 0 is used to get around a compiler warning. */
1422         U16_GET(nfd, 0, length-length, length, lead);
1423         U16_GET(nfd, 0, length-1, length, trail);
1424 
1425         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1426         lccc2=u_getCombiningClass(lead);
1427         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1428         tccc2=u_getCombiningClass(trail);
1429 
1430         if(lccc1!=lccc2) {
1431             log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1432                     lccc1, lccc2, c);
1433         }
1434         if(tccc1!=tccc2) {
1435             log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1436                     tccc1, tccc2, c);
1437         }
1438 
1439         /* skip some code points */
1440         c=(20*c)/19+1;
1441     }
1442 }
1443 
1444 static void
TestComposition(void)1445 TestComposition(void) {
1446     static const struct {
1447         UNormalizationMode mode;
1448         uint32_t options;
1449         UChar input[12];
1450         UChar expect[12];
1451     } cases[]={
1452         /*
1453          * special cases for UAX #15 bug
1454          * see Unicode Corrigendum #5: Normalization Idempotency
1455          * at http://unicode.org/versions/corrigendum5.html
1456          * (was Public Review Issue #29)
1457          */
1458         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1459         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1460         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1461         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1462 
1463         /* TODO: add test cases for UNORM_FCC here (j2151) */
1464     };
1465 
1466     UChar output[16];
1467     UErrorCode errorCode;
1468     int32_t i, length;
1469 
1470     for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1471         errorCode=U_ZERO_ERROR;
1472         length=unorm_normalize(
1473                     cases[i].input, -1,
1474                     cases[i].mode, cases[i].options,
1475                     output, UPRV_LENGTHOF(output),
1476                     &errorCode);
1477         if( U_FAILURE(errorCode) ||
1478             length!=u_strlen(cases[i].expect) ||
1479             0!=u_memcmp(output, cases[i].expect, length)
1480         ) {
1481             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1482         }
1483     }
1484 }
1485 
1486 static void
TestGetDecomposition()1487 TestGetDecomposition() {
1488     UChar decomp[32];
1489     int32_t length;
1490 
1491     UErrorCode errorCode=U_ZERO_ERROR;
1492     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1493     if(U_FAILURE(errorCode)) {
1494         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1495         return;
1496     }
1497 
1498     length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1499     if(U_FAILURE(errorCode) || length>=0) {
1500         log_err("unorm2_getDecomposition(fcc, space) failed\n");
1501     }
1502     errorCode=U_ZERO_ERROR;
1503     length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1504     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1505         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1506     }
1507     errorCode=U_ZERO_ERROR;
1508     length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1509     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1510         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1511     }
1512     errorCode=U_ZERO_ERROR;
1513     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1514     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1515         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1516     }
1517     errorCode=U_ZERO_ERROR;
1518     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1519     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1520         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1521     }
1522     errorCode=U_ZERO_ERROR;
1523     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1524     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1525         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1526     }
1527 }
1528 
1529 static void
TestGetRawDecomposition()1530 TestGetRawDecomposition() {
1531     UChar decomp[32];
1532     int32_t length;
1533 
1534     UErrorCode errorCode=U_ZERO_ERROR;
1535     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1536     if(U_FAILURE(errorCode)) {
1537         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1538         return;
1539     }
1540     /*
1541      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1542      * without recursive decomposition.
1543      */
1544 
1545     length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1546     if(U_FAILURE(errorCode) || length>=0) {
1547         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1548     }
1549     errorCode=U_ZERO_ERROR;
1550     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1551     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1552         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1553     }
1554     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1555     errorCode=U_ZERO_ERROR;
1556     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1557     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1558         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1559     }
1560     /* U+212B ANGSTROM SIGN */
1561     errorCode=U_ZERO_ERROR;
1562     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1563     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1564         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1565     }
1566     errorCode=U_ZERO_ERROR;
1567     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1568     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1569         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1570     }
1571     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1572     errorCode=U_ZERO_ERROR;
1573     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1574     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1575         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1576     }
1577     errorCode=U_ZERO_ERROR;
1578     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1579     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1580         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1581     }
1582     errorCode=U_ZERO_ERROR;
1583     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1584     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1585         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1586     }
1587     errorCode=U_ZERO_ERROR;
1588     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1589     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1590         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1591     }
1592 }
1593 
1594 static void
TestAppendRestoreMiddle()1595 TestAppendRestoreMiddle() {
1596     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1597     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1598     /* NFC: C5 is 'A with ring above' */
1599     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1600     int32_t length;
1601     UErrorCode errorCode=U_ZERO_ERROR;
1602     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1603     if(U_FAILURE(errorCode)) {
1604         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1605         return;
1606     }
1607     /*
1608      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1609      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1610      * still fits into a[] but the full result still overflows this capacity.
1611      * (Let it modify the destination buffer before reallocating internally.)
1612      */
1613     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1614     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1615         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1616         return;
1617     }
1618     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1619     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1620         log_err("unorm2_append(overflow) modified the first string\n");
1621         return;
1622     }
1623     errorCode=U_ZERO_ERROR;
1624     length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1625     if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1626         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1627         return;
1628     }
1629 }
1630 
1631 static void
TestGetEasyToUseInstance()1632 TestGetEasyToUseInstance() {
1633     static const UChar in[]={
1634         0xA0,  /* -> <noBreak> 0020 */
1635         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1636     };
1637     UChar out[32];
1638     int32_t length;
1639 
1640     UErrorCode errorCode=U_ZERO_ERROR;
1641     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1642     if(U_FAILURE(errorCode)) {
1643         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1644         return;
1645     }
1646     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1647     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1648         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1649                 (int)length, u_errorName(errorCode));
1650     }
1651 
1652     errorCode=U_ZERO_ERROR;
1653     n2=unorm2_getNFDInstance(&errorCode);
1654     if(U_FAILURE(errorCode)) {
1655         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1656         return;
1657     }
1658     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1659     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1660         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1661                 (int)length, u_errorName(errorCode));
1662     }
1663 
1664     errorCode=U_ZERO_ERROR;
1665     n2=unorm2_getNFKCInstance(&errorCode);
1666     if(U_FAILURE(errorCode)) {
1667         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1668         return;
1669     }
1670     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1671     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1672         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1673                 (int)length, u_errorName(errorCode));
1674     }
1675 
1676     errorCode=U_ZERO_ERROR;
1677     n2=unorm2_getNFKDInstance(&errorCode);
1678     if(U_FAILURE(errorCode)) {
1679         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1680         return;
1681     }
1682     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1683     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1684         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1685                 (int)length, u_errorName(errorCode));
1686     }
1687 
1688     errorCode=U_ZERO_ERROR;
1689     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1690     if(U_FAILURE(errorCode)) {
1691         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1692         return;
1693     }
1694     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1695     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1696         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1697                 (int)length, u_errorName(errorCode));
1698     }
1699 }
1700 
1701 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1702