• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 *     synwee                      added test for quick check
16 *     synwee                      added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 
25 #if !UCONFIG_NO_NORMALIZATION
26 
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <time.h>
30 #include "unicode/uchar.h"
31 #include "unicode/ustring.h"
32 #include "unicode/unorm.h"
33 #include "cnormtst.h"
34 
35 static void
36 TestAPI(void);
37 
38 static void
39 TestNormCoverage(void);
40 
41 static void
42 TestConcatenate(void);
43 
44 static void
45 TestNextPrevious(void);
46 
47 static void TestIsNormalized(void);
48 
49 static void
50 TestFCNFKCClosure(void);
51 
52 static void
53 TestQuickCheckPerCP(void);
54 
55 static void
56 TestComposition(void);
57 
58 static void
59 TestFCD(void);
60 
61 static void
62 TestGetDecomposition(void);
63 
64 static void
65 TestGetRawDecomposition(void);
66 
67 static void TestAppendRestoreMiddle(void);
68 static void TestGetEasyToUseInstance(void);
69 static void TestAPICoverage(void);
70 
71 static const char* const canonTests[][3] = {
72     /* Input*/                    /*Decomposed*/                /*Composed*/
73     { "cat",                    "cat",                        "cat"                    },
74     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
75 
76     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
77     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
78 
79     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
80     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
81     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
82 
83     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
84     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
85 
86     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
87     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
88     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
89 
90     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
91     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
92 
93     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
94     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
95 
96     { "Henry IV",                "Henry IV",                    "Henry IV"                },
97     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
98 
99     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
100     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
101     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
102     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
103     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
104     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
105     { "", "", "" }
106 };
107 
108 static const char* const compatTests[][3] = {
109     /* Input*/                        /*Decomposed    */                /*Composed*/
110     { "cat",                        "cat",                            "cat"                },
111 
112     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
113 
114     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
115     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
116 
117     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
118     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
119 
120     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
121     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
122 
123     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
124 
125     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
126     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
127     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
128     { "", "", "" }
129 };
130 
131 static const char* const fcdTests[][3] = {
132     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
133     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
134     { "\\u010e", "\\u010e", NULL }  /* D-caron */
135 };
136 
137 void addNormTest(TestNode** root);
138 
addNormTest(TestNode ** root)139 void addNormTest(TestNode** root)
140 {
141     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
142     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
143     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
144     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
145     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
146     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
147     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
148     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
149     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
150     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
151     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
152     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
153     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
154     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
155     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
156     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
157     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
158     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
159     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
160     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
161     addTest(root, &TestAPICoverage, "tsnorm/cnormtst/TestAPICoverage");
162 }
163 
164 static const char* const modeStrings[]={
165     "?",
166     "UNORM_NONE",
167     "UNORM_NFD",
168     "UNORM_NFKD",
169     "UNORM_NFC",
170     "UNORM_NFKC",
171     "UNORM_FCD",
172     "UNORM_MODE_COUNT"
173 };
174 
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)175 static void TestNormCases(UNormalizationMode mode,
176                           const char* const cases[][3], int32_t lengthOfCases) {
177     int32_t x, neededLen, length2;
178     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
179     UChar *source=NULL;
180     UChar result[16];
181     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
182     for(x=0; x < lengthOfCases; x++)
183     {
184         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
185         source=CharsToUChars(cases[x][0]);
186         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
187         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
188         if(neededLen!=length2) {
189           log_err("ERROR in unorm_normalize(%s)[%d]: "
190                   "preflight length/srcLength %d!=%d preflight length/NUL\n",
191                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
192         }
193         if(status==U_BUFFER_OVERFLOW_ERROR)
194         {
195             status=U_ZERO_ERROR;
196         }
197         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
198         if(U_FAILURE(status) || neededLen!=length2) {
199             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
200                          modeStrings[mode], austrdup(source), myErrorName(status));
201         } else {
202             assertEqual(result, cases[x][expIndex], x);
203         }
204         length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
205         if(U_FAILURE(status) || neededLen!=length2) {
206             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
207                          modeStrings[mode], austrdup(source), myErrorName(status));
208         } else {
209             assertEqual(result, cases[x][expIndex], x);
210         }
211         free(source);
212     }
213 }
214 
TestDecomp()215 void TestDecomp() {
216     TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
217 }
218 
TestCompatDecomp()219 void TestCompatDecomp() {
220     TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
221 }
222 
TestCanonDecompCompose()223 void TestCanonDecompCompose() {
224     TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
225 }
226 
TestCompatDecompCompose()227 void TestCompatDecompCompose() {
228     TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
229 }
230 
TestFCD()231 void TestFCD() {
232     TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
233 }
234 
assertEqual(const UChar * result,const char * expected,int32_t index)235 static void assertEqual(const UChar* result, const char* expected, int32_t index)
236 {
237     UChar *expectedUni = CharsToUChars(expected);
238     if(u_strcmp(result, expectedUni)!=0){
239         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
240             austrdup(result) );
241     }
242     free(expectedUni);
243 }
244 
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)245 static void TestNull_check(UChar *src, int32_t srcLen,
246                     UChar *exp, int32_t expLen,
247                     UNormalizationMode mode,
248                     const char *name)
249 {
250     UErrorCode status = U_ZERO_ERROR;
251     int32_t len, i;
252 
253     UChar   result[50];
254 
255 
256     status = U_ZERO_ERROR;
257 
258     for(i=0;i<50;i++)
259       {
260         result[i] = 0xFFFD;
261       }
262 
263     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
264 
265     if(U_FAILURE(status)) {
266       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
267     } else if (len != expLen) {
268       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
269     }
270 
271     {
272       for(i=0;i<len;i++){
273         if(exp[i] != result[i]) {
274           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
275                   name,
276                   i,
277                   exp[i],
278                   result[i]);
279           return;
280         }
281         log_verbose("     %d: \\u%04X\n", i, result[i]);
282       }
283     }
284 
285     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
286 }
287 
TestNull()288 void TestNull()
289 {
290 
291     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
292     int32_t source_comp_len = 4;
293     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
294     int32_t expect_comp_len = 3;
295 
296     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
297     int32_t source_dcmp_len = 3;
298     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
299     int32_t expect_dcmp_len = 5;
300 
301     TestNull_check(source_comp,
302                    source_comp_len,
303                    expect_comp,
304                    expect_comp_len,
305                    UNORM_NFC,
306                    "UNORM_NFC");
307 
308     TestNull_check(source_dcmp,
309                    source_dcmp_len,
310                    expect_dcmp,
311                    expect_dcmp_len,
312                    UNORM_NFD,
313                    "UNORM_NFD");
314 
315     TestNull_check(source_comp,
316                    source_comp_len,
317                    expect_comp,
318                    expect_comp_len,
319                    UNORM_NFKC,
320                    "UNORM_NFKC");
321 
322 
323 }
324 
TestQuickCheckResultNO()325 static void TestQuickCheckResultNO()
326 {
327   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
328                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
329   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
330                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
331   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
332                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
333   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
334                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
335 
336 
337   const int SIZE = 10;
338 
339   int count = 0;
340   UErrorCode error = U_ZERO_ERROR;
341 
342   for (; count < SIZE; count ++)
343   {
344     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
345                                                               UNORM_NO)
346     {
347       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
348       return;
349     }
350     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
351                                                               UNORM_NO)
352     {
353       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
354       return;
355     }
356     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
357                                                               UNORM_NO)
358     {
359       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
360       return;
361     }
362     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
363                                                               UNORM_NO)
364     {
365       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
366       return;
367     }
368   }
369 }
370 
371 
TestQuickCheckResultYES()372 static void TestQuickCheckResultYES()
373 {
374   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
375                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
376   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
377                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
378   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
379                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
380   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
381                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
382 
383   const int SIZE = 10;
384   int count = 0;
385   UErrorCode error = U_ZERO_ERROR;
386 
387   UChar cp = 0;
388   while (cp < 0xA0)
389   {
390     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
391     {
392       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
393       return;
394     }
395     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
396                                                              UNORM_YES)
397     {
398       log_err("ERROR in NFC quick check at U+%04x\n", cp);
399       return;
400     }
401     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
402     {
403       log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
404       return;
405     }
406     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
407                                                              UNORM_YES)
408     {
409       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
410       return;
411     }
412     cp ++;
413   }
414 
415   for (; count < SIZE; count ++)
416   {
417     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
418                                                              UNORM_YES)
419     {
420       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
421       return;
422     }
423     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
424                                                           != UNORM_YES)
425     {
426       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
427       return;
428     }
429     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
430                                                              UNORM_YES)
431     {
432       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
433       return;
434     }
435     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
436                                                              UNORM_YES)
437     {
438       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
439       return;
440     }
441   }
442 }
443 
TestQuickCheckResultMAYBE()444 static void TestQuickCheckResultMAYBE()
445 {
446   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
447                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
448   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
449                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
450 
451 
452   const int SIZE = 10;
453 
454   int count = 0;
455   UErrorCode error = U_ZERO_ERROR;
456 
457   /* NFD and NFKD does not have any MAYBE codepoints */
458   for (; count < SIZE; count ++)
459   {
460     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
461                                                            UNORM_MAYBE)
462     {
463       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
464       return;
465     }
466     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
467                                                            UNORM_MAYBE)
468     {
469       log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
470       return;
471     }
472   }
473 }
474 
TestQuickCheckStringResult()475 static void TestQuickCheckStringResult()
476 {
477   int count;
478   UChar *d = NULL;
479   UChar *c = NULL;
480   UErrorCode error = U_ZERO_ERROR;
481 
482   for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
483   {
484     d = CharsToUChars(canonTests[count][1]);
485     c = CharsToUChars(canonTests[count][2]);
486     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
487                                                             UNORM_YES)
488     {
489       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
490       free(d); free(c);
491       return;
492     }
493 
494     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
495                                                             UNORM_NO)
496     {
497       log_err("ERROR in NFC quick check for string at count %d\n", count);
498       free(d); free(c);
499       return;
500     }
501 
502     free(d);
503     free(c);
504   }
505 
506   for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
507   {
508     d = CharsToUChars(compatTests[count][1]);
509     c = CharsToUChars(compatTests[count][2]);
510     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
511                                                             UNORM_YES)
512     {
513       log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
514       free(d); free(c);
515       return;
516     }
517 
518     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
519                                                             UNORM_YES)
520     {
521       log_err("ERROR in NFKC quick check for string at count %d\n", count);
522       free(d); free(c);
523       return;
524     }
525 
526     free(d);
527     free(c);
528   }
529 }
530 
TestQuickCheck()531 void TestQuickCheck()
532 {
533   TestQuickCheckResultNO();
534   TestQuickCheckResultYES();
535   TestQuickCheckResultMAYBE();
536   TestQuickCheckStringResult();
537 }
538 
539 /*
540  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
541  * normalized, and some that are not.
542  * Here we pick some specific cases and test the C API.
543  */
TestIsNormalized(void)544 static void TestIsNormalized(void) {
545     static const UChar notNFC[][8]={            /* strings that are not in NFC */
546         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
547         { 0xfb1d, 0 },                          /* excluded from composition */
548         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
549         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
550     };
551     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
552         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
553         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
554         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
555     };
556 
557     int32_t i;
558     UErrorCode errorCode;
559 
560     /* API test */
561 
562     /* normal case with length>=0 (length -1 used for special cases below) */
563     errorCode=U_ZERO_ERROR;
564     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
565         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
566     }
567 
568     /* incoming U_FAILURE */
569     errorCode=U_TRUNCATED_CHAR_FOUND;
570     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
571     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
572         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
573     }
574 
575     /* NULL source */
576     errorCode=U_ZERO_ERROR;
577     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
578     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
579         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
580     }
581 
582     /* bad length */
583     errorCode=U_ZERO_ERROR;
584     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
585     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
586         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
587     }
588 
589     /* specific cases */
590     for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
591         errorCode=U_ZERO_ERROR;
592         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
593             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
594         }
595         errorCode=U_ZERO_ERROR;
596         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
597             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
598         }
599     }
600     for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
601         errorCode=U_ZERO_ERROR;
602         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
603             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
604         }
605     }
606 }
607 
TestCheckFCD()608 void TestCheckFCD()
609 {
610   UErrorCode status = U_ZERO_ERROR;
611   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
612                          0x0A};
613   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
614                           0x02B9, 0x0314, 0x0315, 0x0316};
615   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
616                          0x0050, 0x0730, 0x09EE, 0x1E10};
617 
618   static const UChar datastr[][5] =
619   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
620     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
621     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
622     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
623   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
624 
625   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
626                             0x6a,
627                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
628                             0xea,
629                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
630                             0x0307, 0x0308, 0x0309, 0x030a,
631                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
632                             0x0327, 0x0328, 0x0329, 0x032a,
633                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
634                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
635 
636   int count = 0;
637 
638   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
639     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
640   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
641     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
642   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
643     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
644 
645   if (U_FAILURE(status))
646     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
647 
648   while (count < 4)
649   {
650     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
651     if (U_FAILURE(status)) {
652       log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data set %d - (Are you missing data?)\n", count);
653       break;
654     }
655     else {
656       if (result[count] != fcdresult) {
657         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
658                  result[count]);
659       }
660     }
661     count ++;
662   }
663 
664   /* random checks of long strings */
665   status = U_ZERO_ERROR;
666   srand((unsigned)time( NULL ));
667 
668   for (count = 0; count < 50; count ++)
669   {
670     int size = 0;
671     UNormalizationCheckResult testresult = UNORM_YES;
672     UChar data[20];
673     UChar norm[100];
674     UChar nfd[100];
675     int normsize = 0;
676     int nfdsize = 0;
677 
678     while (size != 19) {
679       data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
680       log_verbose("0x%x", data[size]);
681       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
682                                   norm + normsize, 100 - normsize, &status);
683       if (U_FAILURE(status)) {
684         log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data generation - (Are you missing data?)\n");
685         break;
686       }
687       size ++;
688     }
689     log_verbose("\n");
690 
691     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
692                               nfd, 100, &status);
693     if (U_FAILURE(status)) {
694       log_data_err("unorm_quickCheck(FCD) failed: exception occurred at normalized data generation - (Are you missing data?)\n");
695     }
696 
697     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
698       testresult = UNORM_NO;
699     }
700     if (testresult == UNORM_YES) {
701       log_verbose("result UNORM_YES\n");
702     }
703     else {
704       log_verbose("result UNORM_NO\n");
705     }
706 
707     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
708       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
709     }
710   }
711 }
712 
713 static void
TestAPI()714 TestAPI() {
715     static const UChar in[]={ 0x68, 0xe4 };
716     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
717     UErrorCode errorCode;
718     int32_t length;
719 
720     /* try preflighting */
721     errorCode=U_ZERO_ERROR;
722     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
723     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
724         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
725         return;
726     }
727 
728     errorCode=U_ZERO_ERROR;
729     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
730     if(U_FAILURE(errorCode)) {
731         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
732         return;
733     }
734     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
735         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
736         return;
737     }
738     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
739     if(U_FAILURE(errorCode)) {
740         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
741         return;
742     }
743     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
744     if(U_FAILURE(errorCode)) {
745         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
746         return;
747     }
748 }
749 
750 /* test cases to improve test code coverage */
751 enum {
752     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
753     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
754     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
755 
756     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
757     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
758     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
759 
760     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
761     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
762 
763     MUSICAL_VOID_NOTEHEAD=0x1d157,
764     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
765     MUSICAL_STEM=0x1d165,       /* cc=216 */
766     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
767 };
768 
769 static void
TestNormCoverage()770 TestNormCoverage() {
771     UChar input[1000], expect[1000], output[1000];
772     UErrorCode errorCode;
773     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
774 
775     /* create a long and nasty string with NFKC-unsafe characters */
776     inLength=0;
777 
778     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
779     input[inLength++]=HANGUL_KIYEOK;
780     input[inLength++]=HANGUL_WEO;
781     input[inLength++]=HANGUL_KIYEOK_SIOS;
782 
783     input[inLength++]=HANGUL_KIYEOK;
784     input[inLength++]=HANGUL_WEO;
785     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
786 
787     input[inLength++]=HANGUL_KIYEOK;
788     input[inLength++]=HANGUL_K_WEO;
789     input[inLength++]=HANGUL_KIYEOK_SIOS;
790 
791     input[inLength++]=HANGUL_KIYEOK;
792     input[inLength++]=HANGUL_K_WEO;
793     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
794 
795     input[inLength++]=HANGUL_K_KIYEOK;
796     input[inLength++]=HANGUL_WEO;
797     input[inLength++]=HANGUL_KIYEOK_SIOS;
798 
799     input[inLength++]=HANGUL_K_KIYEOK;
800     input[inLength++]=HANGUL_WEO;
801     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
802 
803     input[inLength++]=HANGUL_K_KIYEOK;
804     input[inLength++]=HANGUL_K_WEO;
805     input[inLength++]=HANGUL_KIYEOK_SIOS;
806 
807     input[inLength++]=HANGUL_K_KIYEOK;
808     input[inLength++]=HANGUL_K_WEO;
809     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
810 
811     /* Hangul LV with normal/compatibility Jamo T */
812     input[inLength++]=HANGUL_AC00;
813     input[inLength++]=HANGUL_KIYEOK_SIOS;
814 
815     input[inLength++]=HANGUL_AC00;
816     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
817 
818     /* compatibility Jamo L, V */
819     input[inLength++]=HANGUL_K_KIYEOK;
820     input[inLength++]=HANGUL_K_WEO;
821 
822     hangulPrefixLength=inLength;
823 
824     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
825     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
826     for(i=0; i<200; ++i) {
827         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
828         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
829         input[inLength++]=U16_LEAD(MUSICAL_STEM);
830         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
831     }
832 
833     /* (compatibility) Jamo L, T do not compose */
834     input[inLength++]=HANGUL_K_KIYEOK;
835     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
836 
837     /* quick checks */
838     errorCode=U_ZERO_ERROR;
839     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
840         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
841     }
842     errorCode=U_ZERO_ERROR;
843     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
844         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
845     }
846     errorCode=U_ZERO_ERROR;
847     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
848         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
849     }
850     errorCode=U_ZERO_ERROR;
851     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
852         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
853     }
854     errorCode=U_ZERO_ERROR;
855     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
856         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
857     }
858 
859     /* NFKC */
860     expectLength=0;
861     expect[expectLength++]=HANGUL_SYLLABLE;
862 
863     expect[expectLength++]=HANGUL_SYLLABLE;
864 
865     expect[expectLength++]=HANGUL_SYLLABLE;
866 
867     expect[expectLength++]=HANGUL_SYLLABLE;
868 
869     expect[expectLength++]=HANGUL_SYLLABLE;
870 
871     expect[expectLength++]=HANGUL_SYLLABLE;
872 
873     expect[expectLength++]=HANGUL_SYLLABLE;
874 
875     expect[expectLength++]=HANGUL_SYLLABLE;
876 
877     expect[expectLength++]=HANGUL_AC00+3;
878 
879     expect[expectLength++]=HANGUL_AC00+3;
880 
881     expect[expectLength++]=HANGUL_AC00+14*28;
882 
883     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
884     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
885     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
886     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
887     for(i=0; i<200; ++i) {
888         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
889         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
890     }
891     for(i=0; i<200; ++i) {
892         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
893         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
894     }
895 
896     expect[expectLength++]=HANGUL_KIYEOK;
897     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
898 
899     /* try destination overflow first */
900     errorCode=U_ZERO_ERROR;
901     preflightLength=unorm_normalize(input, inLength,
902                            UNORM_NFKC, 0,
903                            output, 100, /* too short */
904                            &errorCode);
905     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
906         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
907     }
908 
909     /* real NFKC */
910     errorCode=U_ZERO_ERROR;
911     length=unorm_normalize(input, inLength,
912                            UNORM_NFKC, 0,
913                            output, UPRV_LENGTHOF(output),
914                            &errorCode);
915     if(U_FAILURE(errorCode)) {
916         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
917     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
918         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
919         for(i=0; i<length; ++i) {
920             if(output[i]!=expect[i]) {
921                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
922                 break;
923             }
924         }
925     }
926     if(length!=preflightLength) {
927         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
928     }
929 
930     /* FCD */
931     u_memcpy(expect, input, hangulPrefixLength);
932     expectLength=hangulPrefixLength;
933 
934     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
935     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
936     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
937     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
938     for(i=0; i<200; ++i) {
939         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
940         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
941     }
942     for(i=0; i<200; ++i) {
943         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
944         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
945     }
946 
947     expect[expectLength++]=HANGUL_K_KIYEOK;
948     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
949 
950     errorCode=U_ZERO_ERROR;
951     length=unorm_normalize(input, inLength,
952                            UNORM_FCD, 0,
953                            output, UPRV_LENGTHOF(output),
954                            &errorCode);
955     if(U_FAILURE(errorCode)) {
956         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
957     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
958         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
959         for(i=0; i<length; ++i) {
960             if(output[i]!=expect[i]) {
961                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
962                 break;
963             }
964         }
965     }
966 }
967 
968 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
969 static void
TestConcatenate(void)970 TestConcatenate(void) {
971     /* "re + 'sume'" */
972     static const UChar
973     left[]={
974         0x72, 0x65, 0
975     },
976     right[]={
977         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
978     },
979     expect[]={
980         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
981     };
982 
983     UChar buffer[100];
984     UErrorCode errorCode;
985     int32_t length;
986 
987     /* left with length, right NUL-terminated */
988     errorCode=U_ZERO_ERROR;
989     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
990     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
991         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
992     }
993 
994     /* preflighting */
995     errorCode=U_ZERO_ERROR;
996     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
997     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
998         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
999     }
1000 
1001     buffer[2]=0x5555;
1002     errorCode=U_ZERO_ERROR;
1003     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1004     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1005         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1006     }
1007 
1008     /* enter with U_FAILURE */
1009     buffer[2]=0xaaaa;
1010     errorCode=U_UNEXPECTED_TOKEN;
1011     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1012     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1013         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1014     }
1015 
1016     /* illegal arguments */
1017     buffer[2]=0xaaaa;
1018     errorCode=U_ZERO_ERROR;
1019     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1020     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1021         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1022     }
1023 
1024     errorCode=U_ZERO_ERROR;
1025     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1026     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1027         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1028     }
1029 }
1030 
1031 enum {
1032     _PLUS=0x2b
1033 };
1034 
1035 static const char *const _modeString[UNORM_MODE_COUNT]={
1036     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1037 };
1038 
1039 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1040 _testIter(const UChar *src, int32_t srcLength,
1041           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1042           const UChar *out, int32_t outLength,
1043           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1044     UChar buffer[4];
1045     const UChar *expect, *outLimit, *in;
1046     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1047     UErrorCode errorCode;
1048     UBool neededToNormalize, expectNeeded;
1049 
1050     errorCode=U_ZERO_ERROR;
1051     outLimit=out+outLength;
1052     if(forward) {
1053         expect=out;
1054         i=index=0;
1055     } else {
1056         expect=outLimit;
1057         i=srcIndexesLength-2;
1058         index=srcLength;
1059     }
1060 
1061     for(;;) {
1062         prevIndex=index;
1063         if(forward) {
1064             if(!iter->hasNext(iter)) {
1065                 return;
1066             }
1067             length=unorm_next(iter,
1068                               buffer, UPRV_LENGTHOF(buffer),
1069                               mode, 0,
1070                               (UBool)(out!=NULL), &neededToNormalize,
1071                               &errorCode);
1072             expectIndex=srcIndexes[i+1];
1073             in=src+prevIndex;
1074             inLength=expectIndex-prevIndex;
1075 
1076             if(out!=NULL) {
1077                 /* get output piece from between plus signs */
1078                 expectLength=0;
1079                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1080                     ++expectLength;
1081                 }
1082                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1083             } else {
1084                 expect=in;
1085                 expectLength=inLength;
1086                 expectNeeded=false;
1087             }
1088         } else {
1089             if(!iter->hasPrevious(iter)) {
1090                 return;
1091             }
1092             length=unorm_previous(iter,
1093                                   buffer, UPRV_LENGTHOF(buffer),
1094                                   mode, 0,
1095                                   (UBool)(out!=NULL), &neededToNormalize,
1096                                   &errorCode);
1097             expectIndex=srcIndexes[i];
1098             in=src+expectIndex;
1099             inLength=prevIndex-expectIndex;
1100 
1101             if(out!=NULL) {
1102                 /* get output piece from between plus signs */
1103                 expectLength=0;
1104                 while(expect!=out && expect[-1]!=_PLUS) {
1105                     ++expectLength;
1106                     --expect;
1107                 }
1108                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1109             } else {
1110                 expect=in;
1111                 expectLength=inLength;
1112                 expectNeeded=false;
1113             }
1114         }
1115         index=iter->getIndex(iter, UITER_CURRENT);
1116 
1117         if(U_FAILURE(errorCode)) {
1118             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1119                     forward, _modeString[mode], i, u_errorName(errorCode));
1120             return;
1121         }
1122         if(expectIndex!=index) {
1123             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1124                     forward, _modeString[mode], i, index, expectIndex);
1125             return;
1126         }
1127         if(expectLength!=length) {
1128             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1129                     forward, _modeString[mode], i, length, expectLength);
1130             return;
1131         }
1132         if(0!=u_memcmp(expect, buffer, length)) {
1133             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1134                     forward, _modeString[mode], i);
1135             return;
1136         }
1137         if(neededToNormalize!=expectNeeded) {
1138         }
1139 
1140         if(forward) {
1141             expect+=expectLength+1; /* go after the + */
1142             ++i;
1143         } else {
1144             --expect; /* go before the + */
1145             --i;
1146         }
1147     }
1148 }
1149 
1150 static void
TestNextPrevious()1151 TestNextPrevious() {
1152     static const UChar
1153     src[]={ /* input string */
1154         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1155     },
1156     nfd[]={ /* + separates expected output pieces */
1157         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1158     },
1159     nfkd[]={
1160         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1161     },
1162     nfc[]={
1163         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1164     },
1165     nfkc[]={
1166         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1167     },
1168     fcd[]={
1169         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1170     };
1171 
1172     /* expected iterator indexes in the source string for each iteration piece */
1173     static const int32_t
1174     nfdIndexes[]={
1175         0, 1, 2, 5, 6, 7
1176     },
1177     nfkdIndexes[]={
1178         0, 1, 2, 5, 6, 7
1179     },
1180     nfcIndexes[]={
1181         0, 1, 2, 5, 6, 7
1182     },
1183     nfkcIndexes[]={
1184         0, 1, 2, 5, 7
1185     },
1186     fcdIndexes[]={
1187         0, 1, 2, 5, 6, 7
1188     };
1189 
1190     UCharIterator iter;
1191 
1192     UChar buffer[4];
1193     int32_t length;
1194 
1195     UBool neededToNormalize;
1196     UErrorCode errorCode;
1197 
1198     uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1199 
1200     /* test iteration with doNormalize */
1201     iter.index=0;
1202     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, true, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1203     iter.index=0;
1204     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, true, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1205     iter.index=0;
1206     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, true, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1207     iter.index=0;
1208     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, true, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1209     iter.index=0;
1210     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, true, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1211 
1212     iter.index=iter.length;
1213     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, false, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1214     iter.index=iter.length;
1215     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, false, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1216     iter.index=iter.length;
1217     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, false, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1218     iter.index=iter.length;
1219     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, false, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1220     iter.index=iter.length;
1221     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, false, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1222 
1223     /* test iteration without doNormalize */
1224     iter.index=0;
1225     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, true, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1226     iter.index=0;
1227     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, true, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1228     iter.index=0;
1229     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, true, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1230     iter.index=0;
1231     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, true, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1232     iter.index=0;
1233     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, true, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1234 
1235     iter.index=iter.length;
1236     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, false, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1237     iter.index=iter.length;
1238     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, false, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1239     iter.index=iter.length;
1240     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, false, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1241     iter.index=iter.length;
1242     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, false, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1243     iter.index=iter.length;
1244     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, false, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1245 
1246     /* try without neededToNormalize */
1247     errorCode=U_ZERO_ERROR;
1248     buffer[0]=5;
1249     iter.index=1;
1250     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1251                       UNORM_NFD, 0, true, NULL,
1252                       &errorCode);
1253     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1254         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1255         return;
1256     }
1257 
1258     /* preflight */
1259     neededToNormalize=9;
1260     iter.index=1;
1261     length=unorm_next(&iter, NULL, 0,
1262                       UNORM_NFD, 0, true, &neededToNormalize,
1263                       &errorCode);
1264     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=false || length!=2) {
1265         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1266         return;
1267     }
1268 
1269     errorCode=U_ZERO_ERROR;
1270     buffer[0]=buffer[1]=5;
1271     neededToNormalize=9;
1272     iter.index=1;
1273     length=unorm_next(&iter, buffer, 1,
1274                       UNORM_NFD, 0, true, &neededToNormalize,
1275                       &errorCode);
1276     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=false || length!=2 || buffer[1]!=5) {
1277         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1278         return;
1279     }
1280 
1281     /* no iterator */
1282     errorCode=U_ZERO_ERROR;
1283     buffer[0]=buffer[1]=5;
1284     neededToNormalize=9;
1285     iter.index=1;
1286     length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1287                       UNORM_NFD, 0, true, &neededToNormalize,
1288                       &errorCode);
1289     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1290         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1291         return;
1292     }
1293 
1294     /* illegal mode */
1295     buffer[0]=buffer[1]=5;
1296     neededToNormalize=9;
1297     iter.index=1;
1298     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1299                       (UNormalizationMode)0, 0, true, &neededToNormalize,
1300                       &errorCode);
1301     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1302         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1303         return;
1304     }
1305 
1306     /* error coming in */
1307     errorCode=U_MISPLACED_QUANTIFIER;
1308     buffer[0]=5;
1309     iter.index=1;
1310     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1311                       UNORM_NFD, 0, true, NULL,
1312                       &errorCode);
1313     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1314         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1315         return;
1316     }
1317 }
1318 
1319 static void
TestFCNFKCClosure(void)1320 TestFCNFKCClosure(void) {
1321     static const struct {
1322         UChar32 c;
1323         const UChar s[6];
1324     } tests[]={
1325         { 0x00C4, { 0 } },
1326         { 0x00E4, { 0 } },
1327         { 0x037A, { 0x0020, 0x03B9, 0 } },
1328         { 0x03D2, { 0x03C5, 0 } },
1329         { 0x20A8, { 0x0072, 0x0073, 0 } },
1330         { 0x210B, { 0x0068, 0 } },
1331         { 0x210C, { 0x0068, 0 } },
1332         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1333         { 0x2122, { 0x0074, 0x006D, 0 } },
1334         { 0x2128, { 0x007A, 0 } },
1335         { 0x1D5DB, { 0x0068, 0 } },
1336         { 0x1D5ED, { 0x007A, 0 } },
1337         { 0x0061, { 0 } }
1338     };
1339 
1340     UChar buffer[8];
1341     UErrorCode errorCode;
1342     int32_t i, length;
1343 
1344     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1345         errorCode=U_ZERO_ERROR;
1346         length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1347         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1348             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1349         }
1350     }
1351 
1352     /* error handling */
1353     errorCode=U_ZERO_ERROR;
1354     length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1355     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1356         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1357     }
1358 
1359     length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1360     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1361         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1362     }
1363 }
1364 
1365 static void
TestQuickCheckPerCP()1366 TestQuickCheckPerCP() {
1367     UErrorCode errorCode;
1368     UChar32 c, lead, trail;
1369     UChar s[U16_MAX_LENGTH], nfd[16];
1370     int32_t length, lccc1, lccc2, tccc1, tccc2;
1371     int32_t qc1, qc2;
1372 
1373     if(
1374         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1375         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1376         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1377         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1378         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1379         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1380     ) {
1381         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1382     }
1383 
1384     /*
1385      * compare the quick check property values for some code points
1386      * to the quick check results for checking same-code point strings
1387      */
1388     errorCode=U_ZERO_ERROR;
1389     c=0;
1390     while(c<0x110000) {
1391         length=0;
1392         U16_APPEND_UNSAFE(s, length, c);
1393 
1394         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1395         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1396         if(qc1!=qc2) {
1397             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1398         }
1399 
1400         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1401         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1402         if(qc1!=qc2) {
1403             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1404         }
1405 
1406         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1407         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1408         if(qc1!=qc2) {
1409             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1410         }
1411 
1412         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1413         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1414         if(qc1!=qc2) {
1415             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1416         }
1417 
1418         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1419         if (U_FAILURE(errorCode)) {
1420             log_data_err("%s:%d errorCode=%s\n", __FILE__, __LINE__, u_errorName(errorCode));
1421             break;
1422         }
1423 
1424         /* length-length == 0 is used to get around a compiler warning. */
1425         U16_GET(nfd, 0, length-length, length, lead);
1426         U16_GET(nfd, 0, length-1, length, trail);
1427 
1428         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1429         lccc2=u_getCombiningClass(lead);
1430         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1431         tccc2=u_getCombiningClass(trail);
1432 
1433         if(lccc1!=lccc2) {
1434             log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1435                     lccc1, lccc2, c);
1436         }
1437         if(tccc1!=tccc2) {
1438             log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1439                     tccc1, tccc2, c);
1440         }
1441 
1442         /* skip some code points */
1443         c=(20*c)/19+1;
1444     }
1445 }
1446 
1447 static void
TestComposition(void)1448 TestComposition(void) {
1449     static const struct {
1450         UNormalizationMode mode;
1451         uint32_t options;
1452         UChar input[12];
1453         UChar expect[12];
1454     } cases[]={
1455         /*
1456          * special cases for UAX #15 bug
1457          * see Unicode Corrigendum #5: Normalization Idempotency
1458          * at http://unicode.org/versions/corrigendum5.html
1459          * (was Public Review Issue #29)
1460          */
1461         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1462         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1463         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1464         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1465 
1466         /* TODO: add test cases for UNORM_FCC here (j2151) */
1467     };
1468 
1469     UChar output[16];
1470     UErrorCode errorCode;
1471     int32_t i, length;
1472 
1473     for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1474         errorCode=U_ZERO_ERROR;
1475         length=unorm_normalize(
1476                     cases[i].input, -1,
1477                     cases[i].mode, cases[i].options,
1478                     output, UPRV_LENGTHOF(output),
1479                     &errorCode);
1480         if( U_FAILURE(errorCode) ||
1481             length!=u_strlen(cases[i].expect) ||
1482             0!=u_memcmp(output, cases[i].expect, length)
1483         ) {
1484             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1485         }
1486     }
1487 }
1488 
1489 static void
TestGetDecomposition()1490 TestGetDecomposition() {
1491     UChar decomp[32];
1492     int32_t length;
1493 
1494     UErrorCode errorCode=U_ZERO_ERROR;
1495     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1496     if(U_FAILURE(errorCode)) {
1497         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1498         return;
1499     }
1500 
1501     length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1502     if(U_FAILURE(errorCode) || length>=0) {
1503         log_err("unorm2_getDecomposition(fcc, space) failed\n");
1504     }
1505     errorCode=U_ZERO_ERROR;
1506     length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1507     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1508         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1509     }
1510     errorCode=U_ZERO_ERROR;
1511     length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1512     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1513         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1514     }
1515     errorCode=U_ZERO_ERROR;
1516     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1517     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1518         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1519     }
1520     errorCode=U_ZERO_ERROR;
1521     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1522     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1523         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1524     }
1525     errorCode=U_ZERO_ERROR;
1526     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1527     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1528         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1529     }
1530 }
1531 
1532 static void
TestGetRawDecomposition()1533 TestGetRawDecomposition() {
1534     UChar decomp[32];
1535     int32_t length;
1536 
1537     UErrorCode errorCode=U_ZERO_ERROR;
1538     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1539     if(U_FAILURE(errorCode)) {
1540         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1541         return;
1542     }
1543     /*
1544      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1545      * without recursive decomposition.
1546      */
1547 
1548     length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1549     if(U_FAILURE(errorCode) || length>=0) {
1550         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1551     }
1552     errorCode=U_ZERO_ERROR;
1553     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1554     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1555         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1556     }
1557     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1558     errorCode=U_ZERO_ERROR;
1559     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1560     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1561         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1562     }
1563     /* U+212B ANGSTROM SIGN */
1564     errorCode=U_ZERO_ERROR;
1565     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1566     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1567         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1568     }
1569     errorCode=U_ZERO_ERROR;
1570     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1571     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1572         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1573     }
1574     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1575     errorCode=U_ZERO_ERROR;
1576     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1577     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1578         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1579     }
1580     errorCode=U_ZERO_ERROR;
1581     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1582     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1583         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1584     }
1585     errorCode=U_ZERO_ERROR;
1586     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1587     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1588         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1589     }
1590     errorCode=U_ZERO_ERROR;
1591     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1592     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1593         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1594     }
1595 }
1596 
1597 static void
TestAppendRestoreMiddle()1598 TestAppendRestoreMiddle() {
1599     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1600     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1601     /* NFC: C5 is 'A with ring above' */
1602     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1603     int32_t length;
1604     UErrorCode errorCode=U_ZERO_ERROR;
1605     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1606     if(U_FAILURE(errorCode)) {
1607         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1608         return;
1609     }
1610     /*
1611      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1612      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1613      * still fits into a[] but the full result still overflows this capacity.
1614      * (Let it modify the destination buffer before reallocating internally.)
1615      */
1616     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1617     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1618         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1619         return;
1620     }
1621     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1622     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1623         log_err("unorm2_append(overflow) modified the first string\n");
1624         return;
1625     }
1626     errorCode=U_ZERO_ERROR;
1627     length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1628     if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1629         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1630         return;
1631     }
1632 }
1633 
1634 static void
TestGetEasyToUseInstance()1635 TestGetEasyToUseInstance() {
1636     static const UChar in[]={
1637         0xA0,  /* -> <noBreak> 0020 */
1638         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1639     };
1640     UChar out[32];
1641     int32_t length;
1642 
1643     UErrorCode errorCode=U_ZERO_ERROR;
1644     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1645     if(U_FAILURE(errorCode)) {
1646         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1647         return;
1648     }
1649     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1650     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1651         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1652                 (int)length, u_errorName(errorCode));
1653     }
1654 
1655     errorCode=U_ZERO_ERROR;
1656     n2=unorm2_getNFDInstance(&errorCode);
1657     if(U_FAILURE(errorCode)) {
1658         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1659         return;
1660     }
1661     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1662     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1663         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1664                 (int)length, u_errorName(errorCode));
1665     }
1666 
1667     errorCode=U_ZERO_ERROR;
1668     n2=unorm2_getNFKCInstance(&errorCode);
1669     if(U_FAILURE(errorCode)) {
1670         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1671         return;
1672     }
1673     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1674     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1675         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1676                 (int)length, u_errorName(errorCode));
1677     }
1678 
1679     errorCode=U_ZERO_ERROR;
1680     n2=unorm2_getNFKDInstance(&errorCode);
1681     if(U_FAILURE(errorCode)) {
1682         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1683         return;
1684     }
1685     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1686     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1687         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1688                 (int)length, u_errorName(errorCode));
1689     }
1690 
1691     errorCode=U_ZERO_ERROR;
1692     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1693     if(U_FAILURE(errorCode)) {
1694         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1695         return;
1696     }
1697     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1698     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1699         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1700                 (int)length, u_errorName(errorCode));
1701     }
1702 }
1703 
1704 static void
TestAPICoverage()1705 TestAPICoverage() {
1706     UErrorCode errorCode = U_ZERO_ERROR;
1707     const UNormalizer2 *n2 = unorm2_getNFDInstance(&errorCode);
1708     if (U_FAILURE(errorCode)) {
1709         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1710         return;
1711     }
1712 
1713     if (!unorm2_hasBoundaryBefore(n2, u'C') || unorm2_hasBoundaryBefore(n2, 0x300)) {
1714         log_err("unorm2_hasBoundaryBefore() returns unexpected results\n");
1715     }
1716 
1717     if (!unorm2_hasBoundaryAfter(n2, u'C') || unorm2_hasBoundaryAfter(n2, 0x300)) {
1718         log_err("unorm2_hasBoundaryAfter() returns unexpected results\n");
1719     }
1720 
1721     if (!unorm2_isInert(n2, 0x50005) || unorm2_isInert(n2, 0x300)) {
1722         log_err("unorm2_isInert() returns unexpected results\n");
1723     }
1724 
1725     errorCode = U_ZERO_ERROR;
1726     if (!unorm2_isNormalized(n2, u"c\u0327\u0300", 3, &errorCode) ||
1727             unorm2_isNormalized(n2, u"c\u0300\u0327", 3, &errorCode) ||
1728             U_FAILURE(errorCode)) {
1729         log_err("unorm2_isNormalized() returns unexpected results\n");
1730     }
1731 
1732     errorCode = U_ZERO_ERROR;
1733     if (unorm2_quickCheck(n2, u"c\u0327\u0300", 3, &errorCode) == UNORM_NO ||
1734             unorm2_quickCheck(n2, u"c\u0300\u0327", 3, &errorCode) == UNORM_YES ||
1735             U_FAILURE(errorCode)) {
1736         log_err("unorm2_quickCheck() returns unexpected results\n");
1737     }
1738 
1739     errorCode = U_ZERO_ERROR;
1740     if (unorm2_spanQuickCheckYes(n2, u"c\u0327\u0300", 3, &errorCode) != 3 ||
1741             unorm2_spanQuickCheckYes(n2, u"c\u0300\u0327", 3, &errorCode) != 1 ||
1742             U_FAILURE(errorCode)) {
1743         log_err("unorm2_spanQuickCheckYes() returns unexpected results\n");
1744     }
1745 
1746     errorCode = U_ZERO_ERROR;
1747     UChar first[10] = { u'c', 0x300, 0, 0, 0, 0, 0, 0, 0, 0 };
1748     int32_t length = unorm2_normalizeSecondAndAppend(
1749         n2, first, 2, UPRV_LENGTHOF(first), u"\u0327d", 2, &errorCode);
1750     if (U_FAILURE(errorCode) || length != 4 || u_strcmp(first, u"c\u0327\u0300d") != 0) {
1751         log_err("unorm2_normalizeSecondAndAppend() returns unexpected results\n");
1752     }
1753 }
1754 
1755 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1756