• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 *     synwee                      added test for quick check
16 *     synwee                      added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 
25 #if !UCONFIG_NO_NORMALIZATION
26 
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33 
34 static void
35 TestAPI(void);
36 
37 static void
38 TestNormCoverage(void);
39 
40 static void
41 TestConcatenate(void);
42 
43 static void
44 TestNextPrevious(void);
45 
46 static void TestIsNormalized(void);
47 
48 static void
49 TestFCNFKCClosure(void);
50 
51 static void
52 TestQuickCheckPerCP(void);
53 
54 static void
55 TestComposition(void);
56 
57 static void
58 TestFCD(void);
59 
60 static void
61 TestGetDecomposition(void);
62 
63 static void
64 TestGetRawDecomposition(void);
65 
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68 static void TestAPICoverage(void);
69 
70 static const char* const canonTests[][3] = {
71     /* Input*/                    /*Decomposed*/                /*Composed*/
72     { "cat",                    "cat",                        "cat"                    },
73     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
74 
75     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
76     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
77 
78     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
79     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
80     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
81 
82     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
83     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
84 
85     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
86     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
87     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
88 
89     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
90     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
91 
92     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
93     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
94 
95     { "Henry IV",                "Henry IV",                    "Henry IV"                },
96     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
97 
98     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
99     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
100     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
101     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
102     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
103     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
104     { "", "", "" }
105 };
106 
107 static const char* const compatTests[][3] = {
108     /* Input*/                        /*Decomposed    */                /*Composed*/
109     { "cat",                        "cat",                            "cat"                },
110 
111     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
112 
113     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
114     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
115 
116     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
117     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
118 
119     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
120     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
121 
122     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
123 
124     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
125     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
126     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
127     { "", "", "" }
128 };
129 
130 static const char* const fcdTests[][3] = {
131     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
132     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
133     { "\\u010e", "\\u010e", NULL }  /* D-caron */
134 };
135 
136 void addNormTest(TestNode** root);
137 
addNormTest(TestNode ** root)138 void addNormTest(TestNode** root)
139 {
140     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
141     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
142     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
143     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
144     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
145     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
146     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
147     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
148     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
149     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
150     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
151     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
152     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
153     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
154     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
155     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
156     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
157     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
158     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
159     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
160     addTest(root, &TestAPICoverage, "tsnorm/cnormtst/TestAPICoverage");
161 }
162 
163 static const char* const modeStrings[]={
164     "?",
165     "UNORM_NONE",
166     "UNORM_NFD",
167     "UNORM_NFKD",
168     "UNORM_NFC",
169     "UNORM_NFKC",
170     "UNORM_FCD",
171     "UNORM_MODE_COUNT"
172 };
173 
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)174 static void TestNormCases(UNormalizationMode mode,
175                           const char* const cases[][3], int32_t lengthOfCases) {
176     int32_t x, neededLen, length2;
177     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
178     UChar *source=NULL;
179     UChar result[16];
180     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
181     for(x=0; x < lengthOfCases; x++)
182     {
183         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
184         source=CharsToUChars(cases[x][0]);
185         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
186         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
187         if(neededLen!=length2) {
188           log_err("ERROR in unorm_normalize(%s)[%d]: "
189                   "preflight length/srcLength %d!=%d preflight length/NUL\n",
190                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
191         }
192         if(status==U_BUFFER_OVERFLOW_ERROR)
193         {
194             status=U_ZERO_ERROR;
195         }
196         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
197         if(U_FAILURE(status) || neededLen!=length2) {
198             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
199                          modeStrings[mode], austrdup(source), myErrorName(status));
200         } else {
201             assertEqual(result, cases[x][expIndex], x);
202         }
203         length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
204         if(U_FAILURE(status) || neededLen!=length2) {
205             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
206                          modeStrings[mode], austrdup(source), myErrorName(status));
207         } else {
208             assertEqual(result, cases[x][expIndex], x);
209         }
210         free(source);
211     }
212 }
213 
TestDecomp()214 void TestDecomp() {
215     TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
216 }
217 
TestCompatDecomp()218 void TestCompatDecomp() {
219     TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
220 }
221 
TestCanonDecompCompose()222 void TestCanonDecompCompose() {
223     TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
224 }
225 
TestCompatDecompCompose()226 void TestCompatDecompCompose() {
227     TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
228 }
229 
TestFCD()230 void TestFCD() {
231     TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
232 }
233 
assertEqual(const UChar * result,const char * expected,int32_t index)234 static void assertEqual(const UChar* result, const char* expected, int32_t index)
235 {
236     UChar *expectedUni = CharsToUChars(expected);
237     if(u_strcmp(result, expectedUni)!=0){
238         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
239             austrdup(result) );
240     }
241     free(expectedUni);
242 }
243 
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)244 static void TestNull_check(UChar *src, int32_t srcLen,
245                     UChar *exp, int32_t expLen,
246                     UNormalizationMode mode,
247                     const char *name)
248 {
249     UErrorCode status = U_ZERO_ERROR;
250     int32_t len, i;
251 
252     UChar   result[50];
253 
254 
255     status = U_ZERO_ERROR;
256 
257     for(i=0;i<50;i++)
258       {
259         result[i] = 0xFFFD;
260       }
261 
262     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
263 
264     if(U_FAILURE(status)) {
265       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
266     } else if (len != expLen) {
267       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
268     }
269 
270     {
271       for(i=0;i<len;i++){
272         if(exp[i] != result[i]) {
273           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
274                   name,
275                   i,
276                   exp[i],
277                   result[i]);
278           return;
279         }
280         log_verbose("     %d: \\u%04X\n", i, result[i]);
281       }
282     }
283 
284     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
285 }
286 
TestNull()287 void TestNull()
288 {
289 
290     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
291     int32_t source_comp_len = 4;
292     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
293     int32_t expect_comp_len = 3;
294 
295     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
296     int32_t source_dcmp_len = 3;
297     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
298     int32_t expect_dcmp_len = 5;
299 
300     TestNull_check(source_comp,
301                    source_comp_len,
302                    expect_comp,
303                    expect_comp_len,
304                    UNORM_NFC,
305                    "UNORM_NFC");
306 
307     TestNull_check(source_dcmp,
308                    source_dcmp_len,
309                    expect_dcmp,
310                    expect_dcmp_len,
311                    UNORM_NFD,
312                    "UNORM_NFD");
313 
314     TestNull_check(source_comp,
315                    source_comp_len,
316                    expect_comp,
317                    expect_comp_len,
318                    UNORM_NFKC,
319                    "UNORM_NFKC");
320 
321 
322 }
323 
TestQuickCheckResultNO()324 static void TestQuickCheckResultNO()
325 {
326   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
327                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
328   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
329                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
330   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
331                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
332   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
333                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
334 
335 
336   const int SIZE = 10;
337 
338   int count = 0;
339   UErrorCode error = U_ZERO_ERROR;
340 
341   for (; count < SIZE; count ++)
342   {
343     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
344                                                               UNORM_NO)
345     {
346       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
347       return;
348     }
349     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
350                                                               UNORM_NO)
351     {
352       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
353       return;
354     }
355     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
356                                                               UNORM_NO)
357     {
358       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
359       return;
360     }
361     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
362                                                               UNORM_NO)
363     {
364       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
365       return;
366     }
367   }
368 }
369 
370 
TestQuickCheckResultYES()371 static void TestQuickCheckResultYES()
372 {
373   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
374                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
375   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
376                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
377   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
378                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
379   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
380                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
381 
382   const int SIZE = 10;
383   int count = 0;
384   UErrorCode error = U_ZERO_ERROR;
385 
386   UChar cp = 0;
387   while (cp < 0xA0)
388   {
389     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
390     {
391       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
392       return;
393     }
394     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
395                                                              UNORM_YES)
396     {
397       log_err("ERROR in NFC quick check at U+%04x\n", cp);
398       return;
399     }
400     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
401     {
402       log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
403       return;
404     }
405     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
406                                                              UNORM_YES)
407     {
408       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
409       return;
410     }
411     cp ++;
412   }
413 
414   for (; count < SIZE; count ++)
415   {
416     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
417                                                              UNORM_YES)
418     {
419       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
420       return;
421     }
422     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
423                                                           != UNORM_YES)
424     {
425       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
426       return;
427     }
428     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
429                                                              UNORM_YES)
430     {
431       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
432       return;
433     }
434     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
435                                                              UNORM_YES)
436     {
437       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
438       return;
439     }
440   }
441 }
442 
TestQuickCheckResultMAYBE()443 static void TestQuickCheckResultMAYBE()
444 {
445   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
446                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
447   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
448                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
449 
450 
451   const int SIZE = 10;
452 
453   int count = 0;
454   UErrorCode error = U_ZERO_ERROR;
455 
456   /* NFD and NFKD does not have any MAYBE codepoints */
457   for (; count < SIZE; count ++)
458   {
459     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
460                                                            UNORM_MAYBE)
461     {
462       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
463       return;
464     }
465     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
466                                                            UNORM_MAYBE)
467     {
468       log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
469       return;
470     }
471   }
472 }
473 
TestQuickCheckStringResult()474 static void TestQuickCheckStringResult()
475 {
476   int count;
477   UChar *d = NULL;
478   UChar *c = NULL;
479   UErrorCode error = U_ZERO_ERROR;
480 
481   for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
482   {
483     d = CharsToUChars(canonTests[count][1]);
484     c = CharsToUChars(canonTests[count][2]);
485     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
486                                                             UNORM_YES)
487     {
488       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
489       free(d); free(c);
490       return;
491     }
492 
493     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
494                                                             UNORM_NO)
495     {
496       log_err("ERROR in NFC quick check for string at count %d\n", count);
497       free(d); free(c);
498       return;
499     }
500 
501     free(d);
502     free(c);
503   }
504 
505   for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
506   {
507     d = CharsToUChars(compatTests[count][1]);
508     c = CharsToUChars(compatTests[count][2]);
509     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
510                                                             UNORM_YES)
511     {
512       log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
513       free(d); free(c);
514       return;
515     }
516 
517     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
518                                                             UNORM_YES)
519     {
520       log_err("ERROR in NFKC quick check for string at count %d\n", count);
521       free(d); free(c);
522       return;
523     }
524 
525     free(d);
526     free(c);
527   }
528 }
529 
TestQuickCheck()530 void TestQuickCheck()
531 {
532   TestQuickCheckResultNO();
533   TestQuickCheckResultYES();
534   TestQuickCheckResultMAYBE();
535   TestQuickCheckStringResult();
536 }
537 
538 /*
539  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
540  * normalized, and some that are not.
541  * Here we pick some specific cases and test the C API.
542  */
TestIsNormalized(void)543 static void TestIsNormalized(void) {
544     static const UChar notNFC[][8]={            /* strings that are not in NFC */
545         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
546         { 0xfb1d, 0 },                          /* excluded from composition */
547         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
548         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
549     };
550     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
551         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
552         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
553         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
554     };
555 
556     int32_t i;
557     UErrorCode errorCode;
558 
559     /* API test */
560 
561     /* normal case with length>=0 (length -1 used for special cases below) */
562     errorCode=U_ZERO_ERROR;
563     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
564         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
565     }
566 
567     /* incoming U_FAILURE */
568     errorCode=U_TRUNCATED_CHAR_FOUND;
569     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
570     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
571         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
572     }
573 
574     /* NULL source */
575     errorCode=U_ZERO_ERROR;
576     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
577     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
578         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
579     }
580 
581     /* bad length */
582     errorCode=U_ZERO_ERROR;
583     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
584     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
585         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
586     }
587 
588     /* specific cases */
589     for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
590         errorCode=U_ZERO_ERROR;
591         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
592             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
593         }
594         errorCode=U_ZERO_ERROR;
595         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
596             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
597         }
598     }
599     for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
600         errorCode=U_ZERO_ERROR;
601         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
602             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
603         }
604     }
605 }
606 
TestCheckFCD()607 void TestCheckFCD()
608 {
609   UErrorCode status = U_ZERO_ERROR;
610   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
611                          0x0A};
612   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
613                           0x02B9, 0x0314, 0x0315, 0x0316};
614   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
615                          0x0050, 0x0730, 0x09EE, 0x1E10};
616 
617   static const UChar datastr[][5] =
618   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
619     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
620     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
621     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
622   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
623 
624   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
625                             0x6a,
626                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
627                             0xea,
628                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
629                             0x0307, 0x0308, 0x0309, 0x030a,
630                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
631                             0x0327, 0x0328, 0x0329, 0x032a,
632                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
633                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
634 
635   int count = 0;
636 
637   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
638     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
639   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
640     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
641   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
642     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
643 
644   if (U_FAILURE(status))
645     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
646 
647   while (count < 4)
648   {
649     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
650     if (U_FAILURE(status)) {
651       log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data set %d - (Are you missing data?)\n", count);
652       break;
653     }
654     else {
655       if (result[count] != fcdresult) {
656         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
657                  result[count]);
658       }
659     }
660     count ++;
661   }
662 
663   /* random checks of long strings */
664   status = U_ZERO_ERROR;
665   srand((unsigned)time( NULL ));
666 
667   for (count = 0; count < 50; count ++)
668   {
669     int size = 0;
670     UNormalizationCheckResult testresult = UNORM_YES;
671     UChar data[20];
672     UChar norm[100];
673     UChar nfd[100];
674     int normsize = 0;
675     int nfdsize = 0;
676 
677     while (size != 19) {
678       data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
679       log_verbose("0x%x", data[size]);
680       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
681                                   norm + normsize, 100 - normsize, &status);
682       if (U_FAILURE(status)) {
683         log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data generation - (Are you missing data?)\n");
684         break;
685       }
686       size ++;
687     }
688     log_verbose("\n");
689 
690     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
691                               nfd, 100, &status);
692     if (U_FAILURE(status)) {
693       log_data_err("unorm_quickCheck(FCD) failed: exception occurred at normalized data generation - (Are you missing data?)\n");
694     }
695 
696     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
697       testresult = UNORM_NO;
698     }
699     if (testresult == UNORM_YES) {
700       log_verbose("result UNORM_YES\n");
701     }
702     else {
703       log_verbose("result UNORM_NO\n");
704     }
705 
706     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
707       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
708     }
709   }
710 }
711 
712 static void
TestAPI()713 TestAPI() {
714     static const UChar in[]={ 0x68, 0xe4 };
715     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
716     UErrorCode errorCode;
717     int32_t length;
718 
719     /* try preflighting */
720     errorCode=U_ZERO_ERROR;
721     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
722     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
723         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
724         return;
725     }
726 
727     errorCode=U_ZERO_ERROR;
728     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
729     if(U_FAILURE(errorCode)) {
730         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
731         return;
732     }
733     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
734         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
735         return;
736     }
737     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
738     if(U_FAILURE(errorCode)) {
739         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
740         return;
741     }
742     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
743     if(U_FAILURE(errorCode)) {
744         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
745         return;
746     }
747 }
748 
749 /* test cases to improve test code coverage */
750 enum {
751     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
752     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
753     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
754 
755     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
756     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
757     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
758 
759     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
760     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
761 
762     MUSICAL_VOID_NOTEHEAD=0x1d157,
763     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
764     MUSICAL_STEM=0x1d165,       /* cc=216 */
765     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
766 };
767 
768 static void
TestNormCoverage()769 TestNormCoverage() {
770     UChar input[1000], expect[1000], output[1000];
771     UErrorCode errorCode;
772     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
773 
774     /* create a long and nasty string with NFKC-unsafe characters */
775     inLength=0;
776 
777     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
778     input[inLength++]=HANGUL_KIYEOK;
779     input[inLength++]=HANGUL_WEO;
780     input[inLength++]=HANGUL_KIYEOK_SIOS;
781 
782     input[inLength++]=HANGUL_KIYEOK;
783     input[inLength++]=HANGUL_WEO;
784     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
785 
786     input[inLength++]=HANGUL_KIYEOK;
787     input[inLength++]=HANGUL_K_WEO;
788     input[inLength++]=HANGUL_KIYEOK_SIOS;
789 
790     input[inLength++]=HANGUL_KIYEOK;
791     input[inLength++]=HANGUL_K_WEO;
792     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
793 
794     input[inLength++]=HANGUL_K_KIYEOK;
795     input[inLength++]=HANGUL_WEO;
796     input[inLength++]=HANGUL_KIYEOK_SIOS;
797 
798     input[inLength++]=HANGUL_K_KIYEOK;
799     input[inLength++]=HANGUL_WEO;
800     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
801 
802     input[inLength++]=HANGUL_K_KIYEOK;
803     input[inLength++]=HANGUL_K_WEO;
804     input[inLength++]=HANGUL_KIYEOK_SIOS;
805 
806     input[inLength++]=HANGUL_K_KIYEOK;
807     input[inLength++]=HANGUL_K_WEO;
808     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
809 
810     /* Hangul LV with normal/compatibility Jamo T */
811     input[inLength++]=HANGUL_AC00;
812     input[inLength++]=HANGUL_KIYEOK_SIOS;
813 
814     input[inLength++]=HANGUL_AC00;
815     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
816 
817     /* compatibility Jamo L, V */
818     input[inLength++]=HANGUL_K_KIYEOK;
819     input[inLength++]=HANGUL_K_WEO;
820 
821     hangulPrefixLength=inLength;
822 
823     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
824     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
825     for(i=0; i<200; ++i) {
826         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
827         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
828         input[inLength++]=U16_LEAD(MUSICAL_STEM);
829         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
830     }
831 
832     /* (compatibility) Jamo L, T do not compose */
833     input[inLength++]=HANGUL_K_KIYEOK;
834     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
835 
836     /* quick checks */
837     errorCode=U_ZERO_ERROR;
838     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
839         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
840     }
841     errorCode=U_ZERO_ERROR;
842     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
843         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
844     }
845     errorCode=U_ZERO_ERROR;
846     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
847         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
848     }
849     errorCode=U_ZERO_ERROR;
850     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
851         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
852     }
853     errorCode=U_ZERO_ERROR;
854     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
855         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
856     }
857 
858     /* NFKC */
859     expectLength=0;
860     expect[expectLength++]=HANGUL_SYLLABLE;
861 
862     expect[expectLength++]=HANGUL_SYLLABLE;
863 
864     expect[expectLength++]=HANGUL_SYLLABLE;
865 
866     expect[expectLength++]=HANGUL_SYLLABLE;
867 
868     expect[expectLength++]=HANGUL_SYLLABLE;
869 
870     expect[expectLength++]=HANGUL_SYLLABLE;
871 
872     expect[expectLength++]=HANGUL_SYLLABLE;
873 
874     expect[expectLength++]=HANGUL_SYLLABLE;
875 
876     expect[expectLength++]=HANGUL_AC00+3;
877 
878     expect[expectLength++]=HANGUL_AC00+3;
879 
880     expect[expectLength++]=HANGUL_AC00+14*28;
881 
882     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
883     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
884     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
885     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
886     for(i=0; i<200; ++i) {
887         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
888         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
889     }
890     for(i=0; i<200; ++i) {
891         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
892         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
893     }
894 
895     expect[expectLength++]=HANGUL_KIYEOK;
896     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
897 
898     /* try destination overflow first */
899     errorCode=U_ZERO_ERROR;
900     preflightLength=unorm_normalize(input, inLength,
901                            UNORM_NFKC, 0,
902                            output, 100, /* too short */
903                            &errorCode);
904     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
905         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
906     }
907 
908     /* real NFKC */
909     errorCode=U_ZERO_ERROR;
910     length=unorm_normalize(input, inLength,
911                            UNORM_NFKC, 0,
912                            output, UPRV_LENGTHOF(output),
913                            &errorCode);
914     if(U_FAILURE(errorCode)) {
915         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
916     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
917         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
918         for(i=0; i<length; ++i) {
919             if(output[i]!=expect[i]) {
920                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
921                 break;
922             }
923         }
924     }
925     if(length!=preflightLength) {
926         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
927     }
928 
929     /* FCD */
930     u_memcpy(expect, input, hangulPrefixLength);
931     expectLength=hangulPrefixLength;
932 
933     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
934     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
935     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
936     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
937     for(i=0; i<200; ++i) {
938         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
939         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
940     }
941     for(i=0; i<200; ++i) {
942         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
943         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
944     }
945 
946     expect[expectLength++]=HANGUL_K_KIYEOK;
947     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
948 
949     errorCode=U_ZERO_ERROR;
950     length=unorm_normalize(input, inLength,
951                            UNORM_FCD, 0,
952                            output, UPRV_LENGTHOF(output),
953                            &errorCode);
954     if(U_FAILURE(errorCode)) {
955         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
956     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
957         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
958         for(i=0; i<length; ++i) {
959             if(output[i]!=expect[i]) {
960                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
961                 break;
962             }
963         }
964     }
965 }
966 
967 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
968 static void
TestConcatenate(void)969 TestConcatenate(void) {
970     /* "re + 'sume'" */
971     static const UChar
972     left[]={
973         0x72, 0x65, 0
974     },
975     right[]={
976         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
977     },
978     expect[]={
979         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
980     };
981 
982     UChar buffer[100];
983     UErrorCode errorCode;
984     int32_t length;
985 
986     /* left with length, right NUL-terminated */
987     errorCode=U_ZERO_ERROR;
988     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
989     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
990         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
991     }
992 
993     /* preflighting */
994     errorCode=U_ZERO_ERROR;
995     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
996     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
997         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
998     }
999 
1000     buffer[2]=0x5555;
1001     errorCode=U_ZERO_ERROR;
1002     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1003     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1004         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1005     }
1006 
1007     /* enter with U_FAILURE */
1008     buffer[2]=0xaaaa;
1009     errorCode=U_UNEXPECTED_TOKEN;
1010     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1011     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1012         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1013     }
1014 
1015     /* illegal arguments */
1016     buffer[2]=0xaaaa;
1017     errorCode=U_ZERO_ERROR;
1018     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1019     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1020         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1021     }
1022 
1023     errorCode=U_ZERO_ERROR;
1024     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1025     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1026         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1027     }
1028 }
1029 
1030 enum {
1031     _PLUS=0x2b
1032 };
1033 
1034 static const char *const _modeString[UNORM_MODE_COUNT]={
1035     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1036 };
1037 
1038 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1039 _testIter(const UChar *src, int32_t srcLength,
1040           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1041           const UChar *out, int32_t outLength,
1042           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1043     UChar buffer[4];
1044     const UChar *expect, *outLimit, *in;
1045     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1046     UErrorCode errorCode;
1047     UBool neededToNormalize, expectNeeded;
1048 
1049     errorCode=U_ZERO_ERROR;
1050     outLimit=out+outLength;
1051     if(forward) {
1052         expect=out;
1053         i=index=0;
1054     } else {
1055         expect=outLimit;
1056         i=srcIndexesLength-2;
1057         index=srcLength;
1058     }
1059 
1060     for(;;) {
1061         prevIndex=index;
1062         if(forward) {
1063             if(!iter->hasNext(iter)) {
1064                 return;
1065             }
1066             length=unorm_next(iter,
1067                               buffer, UPRV_LENGTHOF(buffer),
1068                               mode, 0,
1069                               (UBool)(out!=NULL), &neededToNormalize,
1070                               &errorCode);
1071             expectIndex=srcIndexes[i+1];
1072             in=src+prevIndex;
1073             inLength=expectIndex-prevIndex;
1074 
1075             if(out!=NULL) {
1076                 /* get output piece from between plus signs */
1077                 expectLength=0;
1078                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1079                     ++expectLength;
1080                 }
1081                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1082             } else {
1083                 expect=in;
1084                 expectLength=inLength;
1085                 expectNeeded=FALSE;
1086             }
1087         } else {
1088             if(!iter->hasPrevious(iter)) {
1089                 return;
1090             }
1091             length=unorm_previous(iter,
1092                                   buffer, UPRV_LENGTHOF(buffer),
1093                                   mode, 0,
1094                                   (UBool)(out!=NULL), &neededToNormalize,
1095                                   &errorCode);
1096             expectIndex=srcIndexes[i];
1097             in=src+expectIndex;
1098             inLength=prevIndex-expectIndex;
1099 
1100             if(out!=NULL) {
1101                 /* get output piece from between plus signs */
1102                 expectLength=0;
1103                 while(expect!=out && expect[-1]!=_PLUS) {
1104                     ++expectLength;
1105                     --expect;
1106                 }
1107                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1108             } else {
1109                 expect=in;
1110                 expectLength=inLength;
1111                 expectNeeded=FALSE;
1112             }
1113         }
1114         index=iter->getIndex(iter, UITER_CURRENT);
1115 
1116         if(U_FAILURE(errorCode)) {
1117             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1118                     forward, _modeString[mode], i, u_errorName(errorCode));
1119             return;
1120         }
1121         if(expectIndex!=index) {
1122             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1123                     forward, _modeString[mode], i, index, expectIndex);
1124             return;
1125         }
1126         if(expectLength!=length) {
1127             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1128                     forward, _modeString[mode], i, length, expectLength);
1129             return;
1130         }
1131         if(0!=u_memcmp(expect, buffer, length)) {
1132             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1133                     forward, _modeString[mode], i);
1134             return;
1135         }
1136         if(neededToNormalize!=expectNeeded) {
1137         }
1138 
1139         if(forward) {
1140             expect+=expectLength+1; /* go after the + */
1141             ++i;
1142         } else {
1143             --expect; /* go before the + */
1144             --i;
1145         }
1146     }
1147 }
1148 
1149 static void
TestNextPrevious()1150 TestNextPrevious() {
1151     static const UChar
1152     src[]={ /* input string */
1153         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1154     },
1155     nfd[]={ /* + separates expected output pieces */
1156         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1157     },
1158     nfkd[]={
1159         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1160     },
1161     nfc[]={
1162         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1163     },
1164     nfkc[]={
1165         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1166     },
1167     fcd[]={
1168         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1169     };
1170 
1171     /* expected iterator indexes in the source string for each iteration piece */
1172     static const int32_t
1173     nfdIndexes[]={
1174         0, 1, 2, 5, 6, 7
1175     },
1176     nfkdIndexes[]={
1177         0, 1, 2, 5, 6, 7
1178     },
1179     nfcIndexes[]={
1180         0, 1, 2, 5, 6, 7
1181     },
1182     nfkcIndexes[]={
1183         0, 1, 2, 5, 7
1184     },
1185     fcdIndexes[]={
1186         0, 1, 2, 5, 6, 7
1187     };
1188 
1189     UCharIterator iter;
1190 
1191     UChar buffer[4];
1192     int32_t length;
1193 
1194     UBool neededToNormalize;
1195     UErrorCode errorCode;
1196 
1197     uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1198 
1199     /* test iteration with doNormalize */
1200     iter.index=0;
1201     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1202     iter.index=0;
1203     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1204     iter.index=0;
1205     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1206     iter.index=0;
1207     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1208     iter.index=0;
1209     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1210 
1211     iter.index=iter.length;
1212     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1213     iter.index=iter.length;
1214     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1215     iter.index=iter.length;
1216     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1217     iter.index=iter.length;
1218     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1219     iter.index=iter.length;
1220     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1221 
1222     /* test iteration without doNormalize */
1223     iter.index=0;
1224     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1225     iter.index=0;
1226     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1227     iter.index=0;
1228     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1229     iter.index=0;
1230     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1231     iter.index=0;
1232     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1233 
1234     iter.index=iter.length;
1235     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1236     iter.index=iter.length;
1237     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1238     iter.index=iter.length;
1239     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1240     iter.index=iter.length;
1241     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1242     iter.index=iter.length;
1243     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1244 
1245     /* try without neededToNormalize */
1246     errorCode=U_ZERO_ERROR;
1247     buffer[0]=5;
1248     iter.index=1;
1249     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1250                       UNORM_NFD, 0, TRUE, NULL,
1251                       &errorCode);
1252     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1253         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1254         return;
1255     }
1256 
1257     /* preflight */
1258     neededToNormalize=9;
1259     iter.index=1;
1260     length=unorm_next(&iter, NULL, 0,
1261                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1262                       &errorCode);
1263     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1264         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1265         return;
1266     }
1267 
1268     errorCode=U_ZERO_ERROR;
1269     buffer[0]=buffer[1]=5;
1270     neededToNormalize=9;
1271     iter.index=1;
1272     length=unorm_next(&iter, buffer, 1,
1273                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1274                       &errorCode);
1275     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1276         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1277         return;
1278     }
1279 
1280     /* no iterator */
1281     errorCode=U_ZERO_ERROR;
1282     buffer[0]=buffer[1]=5;
1283     neededToNormalize=9;
1284     iter.index=1;
1285     length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1286                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1287                       &errorCode);
1288     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1289         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1290         return;
1291     }
1292 
1293     /* illegal mode */
1294     buffer[0]=buffer[1]=5;
1295     neededToNormalize=9;
1296     iter.index=1;
1297     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1298                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1299                       &errorCode);
1300     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1301         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1302         return;
1303     }
1304 
1305     /* error coming in */
1306     errorCode=U_MISPLACED_QUANTIFIER;
1307     buffer[0]=5;
1308     iter.index=1;
1309     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1310                       UNORM_NFD, 0, TRUE, NULL,
1311                       &errorCode);
1312     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1313         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1314         return;
1315     }
1316 }
1317 
1318 static void
TestFCNFKCClosure(void)1319 TestFCNFKCClosure(void) {
1320     static const struct {
1321         UChar32 c;
1322         const UChar s[6];
1323     } tests[]={
1324         { 0x00C4, { 0 } },
1325         { 0x00E4, { 0 } },
1326         { 0x037A, { 0x0020, 0x03B9, 0 } },
1327         { 0x03D2, { 0x03C5, 0 } },
1328         { 0x20A8, { 0x0072, 0x0073, 0 } },
1329         { 0x210B, { 0x0068, 0 } },
1330         { 0x210C, { 0x0068, 0 } },
1331         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1332         { 0x2122, { 0x0074, 0x006D, 0 } },
1333         { 0x2128, { 0x007A, 0 } },
1334         { 0x1D5DB, { 0x0068, 0 } },
1335         { 0x1D5ED, { 0x007A, 0 } },
1336         { 0x0061, { 0 } }
1337     };
1338 
1339     UChar buffer[8];
1340     UErrorCode errorCode;
1341     int32_t i, length;
1342 
1343     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1344         errorCode=U_ZERO_ERROR;
1345         length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1346         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1347             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1348         }
1349     }
1350 
1351     /* error handling */
1352     errorCode=U_ZERO_ERROR;
1353     length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1354     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1355         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1356     }
1357 
1358     length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1359     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1360         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1361     }
1362 }
1363 
1364 static void
TestQuickCheckPerCP()1365 TestQuickCheckPerCP() {
1366     UErrorCode errorCode;
1367     UChar32 c, lead, trail;
1368     UChar s[U16_MAX_LENGTH], nfd[16];
1369     int32_t length, lccc1, lccc2, tccc1, tccc2;
1370     int32_t qc1, qc2;
1371 
1372     if(
1373         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1374         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1375         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1376         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1377         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1378         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1379     ) {
1380         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1381     }
1382 
1383     /*
1384      * compare the quick check property values for some code points
1385      * to the quick check results for checking same-code point strings
1386      */
1387     errorCode=U_ZERO_ERROR;
1388     c=0;
1389     while(c<0x110000) {
1390         length=0;
1391         U16_APPEND_UNSAFE(s, length, c);
1392 
1393         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1394         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1395         if(qc1!=qc2) {
1396             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1397         }
1398 
1399         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1400         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1401         if(qc1!=qc2) {
1402             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1403         }
1404 
1405         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1406         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1407         if(qc1!=qc2) {
1408             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1409         }
1410 
1411         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1412         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1413         if(qc1!=qc2) {
1414             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1415         }
1416 
1417         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1418         if (U_FAILURE(errorCode)) {
1419             log_data_err("%s:%d errorCode=%s\n", __FILE__, __LINE__, u_errorName(errorCode));
1420             break;
1421         }
1422 
1423         /* length-length == 0 is used to get around a compiler warning. */
1424         U16_GET(nfd, 0, length-length, length, lead);
1425         U16_GET(nfd, 0, length-1, length, trail);
1426 
1427         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1428         lccc2=u_getCombiningClass(lead);
1429         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1430         tccc2=u_getCombiningClass(trail);
1431 
1432         if(lccc1!=lccc2) {
1433             log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1434                     lccc1, lccc2, c);
1435         }
1436         if(tccc1!=tccc2) {
1437             log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1438                     tccc1, tccc2, c);
1439         }
1440 
1441         /* skip some code points */
1442         c=(20*c)/19+1;
1443     }
1444 }
1445 
1446 static void
TestComposition(void)1447 TestComposition(void) {
1448     static const struct {
1449         UNormalizationMode mode;
1450         uint32_t options;
1451         UChar input[12];
1452         UChar expect[12];
1453     } cases[]={
1454         /*
1455          * special cases for UAX #15 bug
1456          * see Unicode Corrigendum #5: Normalization Idempotency
1457          * at http://unicode.org/versions/corrigendum5.html
1458          * (was Public Review Issue #29)
1459          */
1460         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1461         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1462         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1463         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1464 
1465         /* TODO: add test cases for UNORM_FCC here (j2151) */
1466     };
1467 
1468     UChar output[16];
1469     UErrorCode errorCode;
1470     int32_t i, length;
1471 
1472     for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1473         errorCode=U_ZERO_ERROR;
1474         length=unorm_normalize(
1475                     cases[i].input, -1,
1476                     cases[i].mode, cases[i].options,
1477                     output, UPRV_LENGTHOF(output),
1478                     &errorCode);
1479         if( U_FAILURE(errorCode) ||
1480             length!=u_strlen(cases[i].expect) ||
1481             0!=u_memcmp(output, cases[i].expect, length)
1482         ) {
1483             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1484         }
1485     }
1486 }
1487 
1488 static void
TestGetDecomposition()1489 TestGetDecomposition() {
1490     UChar decomp[32];
1491     int32_t length;
1492 
1493     UErrorCode errorCode=U_ZERO_ERROR;
1494     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1495     if(U_FAILURE(errorCode)) {
1496         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1497         return;
1498     }
1499 
1500     length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1501     if(U_FAILURE(errorCode) || length>=0) {
1502         log_err("unorm2_getDecomposition(fcc, space) failed\n");
1503     }
1504     errorCode=U_ZERO_ERROR;
1505     length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1506     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1507         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1508     }
1509     errorCode=U_ZERO_ERROR;
1510     length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1511     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1512         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1513     }
1514     errorCode=U_ZERO_ERROR;
1515     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1516     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1517         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1518     }
1519     errorCode=U_ZERO_ERROR;
1520     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1521     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1522         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1523     }
1524     errorCode=U_ZERO_ERROR;
1525     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1526     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1527         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1528     }
1529 }
1530 
1531 static void
TestGetRawDecomposition()1532 TestGetRawDecomposition() {
1533     UChar decomp[32];
1534     int32_t length;
1535 
1536     UErrorCode errorCode=U_ZERO_ERROR;
1537     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1538     if(U_FAILURE(errorCode)) {
1539         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1540         return;
1541     }
1542     /*
1543      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1544      * without recursive decomposition.
1545      */
1546 
1547     length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1548     if(U_FAILURE(errorCode) || length>=0) {
1549         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1550     }
1551     errorCode=U_ZERO_ERROR;
1552     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1553     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1554         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1555     }
1556     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1557     errorCode=U_ZERO_ERROR;
1558     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1559     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1560         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1561     }
1562     /* U+212B ANGSTROM SIGN */
1563     errorCode=U_ZERO_ERROR;
1564     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1565     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1566         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1567     }
1568     errorCode=U_ZERO_ERROR;
1569     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1570     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1571         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1572     }
1573     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1574     errorCode=U_ZERO_ERROR;
1575     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1576     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1577         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1578     }
1579     errorCode=U_ZERO_ERROR;
1580     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1581     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1582         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1583     }
1584     errorCode=U_ZERO_ERROR;
1585     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1586     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1587         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1588     }
1589     errorCode=U_ZERO_ERROR;
1590     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1591     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1592         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1593     }
1594 }
1595 
1596 static void
TestAppendRestoreMiddle()1597 TestAppendRestoreMiddle() {
1598     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1599     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1600     /* NFC: C5 is 'A with ring above' */
1601     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1602     int32_t length;
1603     UErrorCode errorCode=U_ZERO_ERROR;
1604     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1605     if(U_FAILURE(errorCode)) {
1606         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1607         return;
1608     }
1609     /*
1610      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1611      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1612      * still fits into a[] but the full result still overflows this capacity.
1613      * (Let it modify the destination buffer before reallocating internally.)
1614      */
1615     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1616     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1617         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1618         return;
1619     }
1620     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1621     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1622         log_err("unorm2_append(overflow) modified the first string\n");
1623         return;
1624     }
1625     errorCode=U_ZERO_ERROR;
1626     length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1627     if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1628         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1629         return;
1630     }
1631 }
1632 
1633 static void
TestGetEasyToUseInstance()1634 TestGetEasyToUseInstance() {
1635     static const UChar in[]={
1636         0xA0,  /* -> <noBreak> 0020 */
1637         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1638     };
1639     UChar out[32];
1640     int32_t length;
1641 
1642     UErrorCode errorCode=U_ZERO_ERROR;
1643     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1644     if(U_FAILURE(errorCode)) {
1645         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1646         return;
1647     }
1648     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1649     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1650         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1651                 (int)length, u_errorName(errorCode));
1652     }
1653 
1654     errorCode=U_ZERO_ERROR;
1655     n2=unorm2_getNFDInstance(&errorCode);
1656     if(U_FAILURE(errorCode)) {
1657         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1658         return;
1659     }
1660     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1661     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1662         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1663                 (int)length, u_errorName(errorCode));
1664     }
1665 
1666     errorCode=U_ZERO_ERROR;
1667     n2=unorm2_getNFKCInstance(&errorCode);
1668     if(U_FAILURE(errorCode)) {
1669         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1670         return;
1671     }
1672     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1673     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1674         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1675                 (int)length, u_errorName(errorCode));
1676     }
1677 
1678     errorCode=U_ZERO_ERROR;
1679     n2=unorm2_getNFKDInstance(&errorCode);
1680     if(U_FAILURE(errorCode)) {
1681         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1682         return;
1683     }
1684     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1685     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1686         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1687                 (int)length, u_errorName(errorCode));
1688     }
1689 
1690     errorCode=U_ZERO_ERROR;
1691     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1692     if(U_FAILURE(errorCode)) {
1693         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1694         return;
1695     }
1696     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1697     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1698         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1699                 (int)length, u_errorName(errorCode));
1700     }
1701 }
1702 
1703 static void
TestAPICoverage()1704 TestAPICoverage() {
1705     UErrorCode errorCode = U_ZERO_ERROR;
1706     const UNormalizer2 *n2 = unorm2_getNFDInstance(&errorCode);
1707     if (U_FAILURE(errorCode)) {
1708         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1709         return;
1710     }
1711 
1712     if (!unorm2_hasBoundaryBefore(n2, u'C') || unorm2_hasBoundaryBefore(n2, 0x300)) {
1713         log_err("unorm2_hasBoundaryBefore() returns unexpected results\n");
1714     }
1715 
1716     if (!unorm2_hasBoundaryAfter(n2, u'C') || unorm2_hasBoundaryAfter(n2, 0x300)) {
1717         log_err("unorm2_hasBoundaryAfter() returns unexpected results\n");
1718     }
1719 
1720     if (!unorm2_isInert(n2, 0x50005) || unorm2_isInert(n2, 0x300)) {
1721         log_err("unorm2_isInert() returns unexpected results\n");
1722     }
1723 
1724     errorCode = U_ZERO_ERROR;
1725     if (!unorm2_isNormalized(n2, u"c\u0327\u0300", 3, &errorCode) ||
1726             unorm2_isNormalized(n2, u"c\u0300\u0327", 3, &errorCode) ||
1727             U_FAILURE(errorCode)) {
1728         log_err("unorm2_isNormalized() returns unexpected results\n");
1729     }
1730 
1731     errorCode = U_ZERO_ERROR;
1732     if (unorm2_quickCheck(n2, u"c\u0327\u0300", 3, &errorCode) == UNORM_NO ||
1733             unorm2_quickCheck(n2, u"c\u0300\u0327", 3, &errorCode) == UNORM_YES ||
1734             U_FAILURE(errorCode)) {
1735         log_err("unorm2_quickCheck() returns unexpected results\n");
1736     }
1737 
1738     errorCode = U_ZERO_ERROR;
1739     if (unorm2_spanQuickCheckYes(n2, u"c\u0327\u0300", 3, &errorCode) != 3 ||
1740             unorm2_spanQuickCheckYes(n2, u"c\u0300\u0327", 3, &errorCode) != 1 ||
1741             U_FAILURE(errorCode)) {
1742         log_err("unorm2_spanQuickCheckYes() returns unexpected results\n");
1743     }
1744 
1745     errorCode = U_ZERO_ERROR;
1746     UChar first[10] = { u'c', 0x300, 0, 0, 0, 0, 0, 0, 0, 0 };
1747     int32_t length = unorm2_normalizeSecondAndAppend(
1748         n2, first, 2, UPRV_LENGTHOF(first), u"\u0327d", 2, &errorCode);
1749     if (U_FAILURE(errorCode) || length != 4 || u_strcmp(first, u"c\u0327\u0300d") != 0) {
1750         log_err("unorm2_normalizeSecondAndAppend() returns unexpected results\n");
1751     }
1752 }
1753 
1754 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1755