1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 * synwee added test for quick check
16 * synwee added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24
25 #if !UCONFIG_NO_NORMALIZATION
26
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33
34 static void
35 TestAPI(void);
36
37 static void
38 TestNormCoverage(void);
39
40 static void
41 TestConcatenate(void);
42
43 static void
44 TestNextPrevious(void);
45
46 static void TestIsNormalized(void);
47
48 static void
49 TestFCNFKCClosure(void);
50
51 static void
52 TestQuickCheckPerCP(void);
53
54 static void
55 TestComposition(void);
56
57 static void
58 TestFCD(void);
59
60 static void
61 TestGetDecomposition(void);
62
63 static void
64 TestGetRawDecomposition(void);
65
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68 static void TestAPICoverage(void);
69
70 static const char* const canonTests[][3] = {
71 /* Input*/ /*Decomposed*/ /*Composed*/
72 { "cat", "cat", "cat" },
73 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
74
75 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
76 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
77
78 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
79 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
80 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
81
82 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
83 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
84
85 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
86 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
87 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
88
89 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
90 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
91
92 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
93 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
94
95 { "Henry IV", "Henry IV", "Henry IV" },
96 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
97
98 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
99 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
100 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
101 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
102 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
103 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
104 { "", "", "" }
105 };
106
107 static const char* const compatTests[][3] = {
108 /* Input*/ /*Decomposed */ /*Composed*/
109 { "cat", "cat", "cat" },
110
111 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
112
113 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
114 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
115
116 { "Henry IV", "Henry IV", "Henry IV" },
117 { "Henry \\u2163", "Henry IV", "Henry IV" },
118
119 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
120 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
121
122 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
123
124 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
125 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
126 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
127 { "", "", "" }
128 };
129
130 static const char* const fcdTests[][3] = {
131 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
132 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */
133 { "\\u010e", "\\u010e", NULL } /* D-caron */
134 };
135
136 void addNormTest(TestNode** root);
137
addNormTest(TestNode ** root)138 void addNormTest(TestNode** root)
139 {
140 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
141 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
142 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
143 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
144 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
145 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
146 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
147 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
148 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
149 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
150 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
151 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
152 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
153 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
154 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
155 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
156 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
157 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
158 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
159 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
160 addTest(root, &TestAPICoverage, "tsnorm/cnormtst/TestAPICoverage");
161 }
162
163 static const char* const modeStrings[]={
164 "?",
165 "UNORM_NONE",
166 "UNORM_NFD",
167 "UNORM_NFKD",
168 "UNORM_NFC",
169 "UNORM_NFKC",
170 "UNORM_FCD",
171 "UNORM_MODE_COUNT"
172 };
173
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)174 static void TestNormCases(UNormalizationMode mode,
175 const char* const cases[][3], int32_t lengthOfCases) {
176 int32_t x, neededLen, length2;
177 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
178 UChar *source=NULL;
179 UChar result[16];
180 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
181 for(x=0; x < lengthOfCases; x++)
182 {
183 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
184 source=CharsToUChars(cases[x][0]);
185 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
186 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
187 if(neededLen!=length2) {
188 log_err("ERROR in unorm_normalize(%s)[%d]: "
189 "preflight length/srcLength %d!=%d preflight length/NUL\n",
190 modeStrings[mode], (int)x, (int)neededLen, (int)length2);
191 }
192 if(status==U_BUFFER_OVERFLOW_ERROR)
193 {
194 status=U_ZERO_ERROR;
195 }
196 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
197 if(U_FAILURE(status) || neededLen!=length2) {
198 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
199 modeStrings[mode], austrdup(source), myErrorName(status));
200 } else {
201 assertEqual(result, cases[x][expIndex], x);
202 }
203 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
204 if(U_FAILURE(status) || neededLen!=length2) {
205 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
206 modeStrings[mode], austrdup(source), myErrorName(status));
207 } else {
208 assertEqual(result, cases[x][expIndex], x);
209 }
210 free(source);
211 }
212 }
213
TestDecomp()214 void TestDecomp() {
215 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
216 }
217
TestCompatDecomp()218 void TestCompatDecomp() {
219 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
220 }
221
TestCanonDecompCompose()222 void TestCanonDecompCompose() {
223 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
224 }
225
TestCompatDecompCompose()226 void TestCompatDecompCompose() {
227 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
228 }
229
TestFCD()230 void TestFCD() {
231 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
232 }
233
assertEqual(const UChar * result,const char * expected,int32_t index)234 static void assertEqual(const UChar* result, const char* expected, int32_t index)
235 {
236 UChar *expectedUni = CharsToUChars(expected);
237 if(u_strcmp(result, expectedUni)!=0){
238 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
239 austrdup(result) );
240 }
241 free(expectedUni);
242 }
243
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)244 static void TestNull_check(UChar *src, int32_t srcLen,
245 UChar *exp, int32_t expLen,
246 UNormalizationMode mode,
247 const char *name)
248 {
249 UErrorCode status = U_ZERO_ERROR;
250 int32_t len, i;
251
252 UChar result[50];
253
254
255 status = U_ZERO_ERROR;
256
257 for(i=0;i<50;i++)
258 {
259 result[i] = 0xFFFD;
260 }
261
262 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
263
264 if(U_FAILURE(status)) {
265 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
266 } else if (len != expLen) {
267 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
268 }
269
270 {
271 for(i=0;i<len;i++){
272 if(exp[i] != result[i]) {
273 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
274 name,
275 i,
276 exp[i],
277 result[i]);
278 return;
279 }
280 log_verbose(" %d: \\u%04X\n", i, result[i]);
281 }
282 }
283
284 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
285 }
286
TestNull()287 void TestNull()
288 {
289
290 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
291 int32_t source_comp_len = 4;
292 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
293 int32_t expect_comp_len = 3;
294
295 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
296 int32_t source_dcmp_len = 3;
297 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
298 int32_t expect_dcmp_len = 5;
299
300 TestNull_check(source_comp,
301 source_comp_len,
302 expect_comp,
303 expect_comp_len,
304 UNORM_NFC,
305 "UNORM_NFC");
306
307 TestNull_check(source_dcmp,
308 source_dcmp_len,
309 expect_dcmp,
310 expect_dcmp_len,
311 UNORM_NFD,
312 "UNORM_NFD");
313
314 TestNull_check(source_comp,
315 source_comp_len,
316 expect_comp,
317 expect_comp_len,
318 UNORM_NFKC,
319 "UNORM_NFKC");
320
321
322 }
323
TestQuickCheckResultNO()324 static void TestQuickCheckResultNO()
325 {
326 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
327 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
328 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
329 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
330 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
331 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
332 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
333 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
334
335
336 const int SIZE = 10;
337
338 int count = 0;
339 UErrorCode error = U_ZERO_ERROR;
340
341 for (; count < SIZE; count ++)
342 {
343 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
344 UNORM_NO)
345 {
346 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
347 return;
348 }
349 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
350 UNORM_NO)
351 {
352 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
353 return;
354 }
355 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
356 UNORM_NO)
357 {
358 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
359 return;
360 }
361 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
362 UNORM_NO)
363 {
364 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
365 return;
366 }
367 }
368 }
369
370
TestQuickCheckResultYES()371 static void TestQuickCheckResultYES()
372 {
373 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
374 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
375 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
376 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
377 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
378 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
379 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
380 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
381
382 const int SIZE = 10;
383 int count = 0;
384 UErrorCode error = U_ZERO_ERROR;
385
386 UChar cp = 0;
387 while (cp < 0xA0)
388 {
389 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
390 {
391 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
392 return;
393 }
394 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
395 UNORM_YES)
396 {
397 log_err("ERROR in NFC quick check at U+%04x\n", cp);
398 return;
399 }
400 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
401 {
402 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
403 return;
404 }
405 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
406 UNORM_YES)
407 {
408 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
409 return;
410 }
411 cp ++;
412 }
413
414 for (; count < SIZE; count ++)
415 {
416 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
417 UNORM_YES)
418 {
419 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
420 return;
421 }
422 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
423 != UNORM_YES)
424 {
425 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
426 return;
427 }
428 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
429 UNORM_YES)
430 {
431 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
432 return;
433 }
434 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
435 UNORM_YES)
436 {
437 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
438 return;
439 }
440 }
441 }
442
TestQuickCheckResultMAYBE()443 static void TestQuickCheckResultMAYBE()
444 {
445 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
446 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
447 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
448 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
449
450
451 const int SIZE = 10;
452
453 int count = 0;
454 UErrorCode error = U_ZERO_ERROR;
455
456 /* NFD and NFKD does not have any MAYBE codepoints */
457 for (; count < SIZE; count ++)
458 {
459 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
460 UNORM_MAYBE)
461 {
462 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
463 return;
464 }
465 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
466 UNORM_MAYBE)
467 {
468 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
469 return;
470 }
471 }
472 }
473
TestQuickCheckStringResult()474 static void TestQuickCheckStringResult()
475 {
476 int count;
477 UChar *d = NULL;
478 UChar *c = NULL;
479 UErrorCode error = U_ZERO_ERROR;
480
481 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
482 {
483 d = CharsToUChars(canonTests[count][1]);
484 c = CharsToUChars(canonTests[count][2]);
485 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
486 UNORM_YES)
487 {
488 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
489 free(d); free(c);
490 return;
491 }
492
493 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
494 UNORM_NO)
495 {
496 log_err("ERROR in NFC quick check for string at count %d\n", count);
497 free(d); free(c);
498 return;
499 }
500
501 free(d);
502 free(c);
503 }
504
505 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
506 {
507 d = CharsToUChars(compatTests[count][1]);
508 c = CharsToUChars(compatTests[count][2]);
509 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
510 UNORM_YES)
511 {
512 log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
513 free(d); free(c);
514 return;
515 }
516
517 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
518 UNORM_YES)
519 {
520 log_err("ERROR in NFKC quick check for string at count %d\n", count);
521 free(d); free(c);
522 return;
523 }
524
525 free(d);
526 free(c);
527 }
528 }
529
TestQuickCheck()530 void TestQuickCheck()
531 {
532 TestQuickCheckResultNO();
533 TestQuickCheckResultYES();
534 TestQuickCheckResultMAYBE();
535 TestQuickCheckStringResult();
536 }
537
538 /*
539 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
540 * normalized, and some that are not.
541 * Here we pick some specific cases and test the C API.
542 */
TestIsNormalized(void)543 static void TestIsNormalized(void) {
544 static const UChar notNFC[][8]={ /* strings that are not in NFC */
545 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
546 { 0xfb1d, 0 }, /* excluded from composition */
547 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
548 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
549 };
550 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
551 { 0x1100, 0x1161, 0 }, /* Jamo compose */
552 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
553 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
554 };
555
556 int32_t i;
557 UErrorCode errorCode;
558
559 /* API test */
560
561 /* normal case with length>=0 (length -1 used for special cases below) */
562 errorCode=U_ZERO_ERROR;
563 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
564 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
565 }
566
567 /* incoming U_FAILURE */
568 errorCode=U_TRUNCATED_CHAR_FOUND;
569 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
570 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
571 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
572 }
573
574 /* NULL source */
575 errorCode=U_ZERO_ERROR;
576 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
577 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
578 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
579 }
580
581 /* bad length */
582 errorCode=U_ZERO_ERROR;
583 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
584 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
585 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
586 }
587
588 /* specific cases */
589 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
590 errorCode=U_ZERO_ERROR;
591 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
592 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
593 }
594 errorCode=U_ZERO_ERROR;
595 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
596 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
597 }
598 }
599 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
600 errorCode=U_ZERO_ERROR;
601 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
602 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
603 }
604 }
605 }
606
TestCheckFCD()607 void TestCheckFCD()
608 {
609 UErrorCode status = U_ZERO_ERROR;
610 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
611 0x0A};
612 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
613 0x02B9, 0x0314, 0x0315, 0x0316};
614 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
615 0x0050, 0x0730, 0x09EE, 0x1E10};
616
617 static const UChar datastr[][5] =
618 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
619 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
620 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
621 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
622 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
623
624 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
625 0x6a,
626 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
627 0xea,
628 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
629 0x0307, 0x0308, 0x0309, 0x030a,
630 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
631 0x0327, 0x0328, 0x0329, 0x032a,
632 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
633 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
634
635 int count = 0;
636
637 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
638 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
639 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
640 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
641 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
642 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
643
644 if (U_FAILURE(status))
645 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
646
647 while (count < 4)
648 {
649 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
650 if (U_FAILURE(status)) {
651 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data set %d - (Are you missing data?)\n", count);
652 break;
653 }
654 else {
655 if (result[count] != fcdresult) {
656 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
657 result[count]);
658 }
659 }
660 count ++;
661 }
662
663 /* random checks of long strings */
664 status = U_ZERO_ERROR;
665 srand((unsigned)time( NULL ));
666
667 for (count = 0; count < 50; count ++)
668 {
669 int size = 0;
670 UNormalizationCheckResult testresult = UNORM_YES;
671 UChar data[20];
672 UChar norm[100];
673 UChar nfd[100];
674 int normsize = 0;
675 int nfdsize = 0;
676
677 while (size != 19) {
678 data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
679 log_verbose("0x%x", data[size]);
680 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
681 norm + normsize, 100 - normsize, &status);
682 if (U_FAILURE(status)) {
683 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data generation - (Are you missing data?)\n");
684 break;
685 }
686 size ++;
687 }
688 log_verbose("\n");
689
690 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
691 nfd, 100, &status);
692 if (U_FAILURE(status)) {
693 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at normalized data generation - (Are you missing data?)\n");
694 }
695
696 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
697 testresult = UNORM_NO;
698 }
699 if (testresult == UNORM_YES) {
700 log_verbose("result UNORM_YES\n");
701 }
702 else {
703 log_verbose("result UNORM_NO\n");
704 }
705
706 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
707 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
708 }
709 }
710 }
711
712 static void
TestAPI()713 TestAPI() {
714 static const UChar in[]={ 0x68, 0xe4 };
715 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
716 UErrorCode errorCode;
717 int32_t length;
718
719 /* try preflighting */
720 errorCode=U_ZERO_ERROR;
721 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
722 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
723 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
724 return;
725 }
726
727 errorCode=U_ZERO_ERROR;
728 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
729 if(U_FAILURE(errorCode)) {
730 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
731 return;
732 }
733 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
734 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
735 return;
736 }
737 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
738 if(U_FAILURE(errorCode)) {
739 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
740 return;
741 }
742 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
743 if(U_FAILURE(errorCode)) {
744 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
745 return;
746 }
747 }
748
749 /* test cases to improve test code coverage */
750 enum {
751 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
752 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
753 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
754
755 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
756 HANGUL_WEO=0x116f, /* Jamo V U+116f */
757 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
758
759 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
760 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
761
762 MUSICAL_VOID_NOTEHEAD=0x1d157,
763 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
764 MUSICAL_STEM=0x1d165, /* cc=216 */
765 MUSICAL_STACCATO=0x1d17c /* cc=220 */
766 };
767
768 static void
TestNormCoverage()769 TestNormCoverage() {
770 UChar input[1000], expect[1000], output[1000];
771 UErrorCode errorCode;
772 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
773
774 /* create a long and nasty string with NFKC-unsafe characters */
775 inLength=0;
776
777 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
778 input[inLength++]=HANGUL_KIYEOK;
779 input[inLength++]=HANGUL_WEO;
780 input[inLength++]=HANGUL_KIYEOK_SIOS;
781
782 input[inLength++]=HANGUL_KIYEOK;
783 input[inLength++]=HANGUL_WEO;
784 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
785
786 input[inLength++]=HANGUL_KIYEOK;
787 input[inLength++]=HANGUL_K_WEO;
788 input[inLength++]=HANGUL_KIYEOK_SIOS;
789
790 input[inLength++]=HANGUL_KIYEOK;
791 input[inLength++]=HANGUL_K_WEO;
792 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
793
794 input[inLength++]=HANGUL_K_KIYEOK;
795 input[inLength++]=HANGUL_WEO;
796 input[inLength++]=HANGUL_KIYEOK_SIOS;
797
798 input[inLength++]=HANGUL_K_KIYEOK;
799 input[inLength++]=HANGUL_WEO;
800 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
801
802 input[inLength++]=HANGUL_K_KIYEOK;
803 input[inLength++]=HANGUL_K_WEO;
804 input[inLength++]=HANGUL_KIYEOK_SIOS;
805
806 input[inLength++]=HANGUL_K_KIYEOK;
807 input[inLength++]=HANGUL_K_WEO;
808 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
809
810 /* Hangul LV with normal/compatibility Jamo T */
811 input[inLength++]=HANGUL_AC00;
812 input[inLength++]=HANGUL_KIYEOK_SIOS;
813
814 input[inLength++]=HANGUL_AC00;
815 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
816
817 /* compatibility Jamo L, V */
818 input[inLength++]=HANGUL_K_KIYEOK;
819 input[inLength++]=HANGUL_K_WEO;
820
821 hangulPrefixLength=inLength;
822
823 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
824 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
825 for(i=0; i<200; ++i) {
826 input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
827 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
828 input[inLength++]=U16_LEAD(MUSICAL_STEM);
829 input[inLength++]=U16_TRAIL(MUSICAL_STEM);
830 }
831
832 /* (compatibility) Jamo L, T do not compose */
833 input[inLength++]=HANGUL_K_KIYEOK;
834 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
835
836 /* quick checks */
837 errorCode=U_ZERO_ERROR;
838 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
839 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
840 }
841 errorCode=U_ZERO_ERROR;
842 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
843 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
844 }
845 errorCode=U_ZERO_ERROR;
846 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
847 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
848 }
849 errorCode=U_ZERO_ERROR;
850 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
851 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
852 }
853 errorCode=U_ZERO_ERROR;
854 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
855 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
856 }
857
858 /* NFKC */
859 expectLength=0;
860 expect[expectLength++]=HANGUL_SYLLABLE;
861
862 expect[expectLength++]=HANGUL_SYLLABLE;
863
864 expect[expectLength++]=HANGUL_SYLLABLE;
865
866 expect[expectLength++]=HANGUL_SYLLABLE;
867
868 expect[expectLength++]=HANGUL_SYLLABLE;
869
870 expect[expectLength++]=HANGUL_SYLLABLE;
871
872 expect[expectLength++]=HANGUL_SYLLABLE;
873
874 expect[expectLength++]=HANGUL_SYLLABLE;
875
876 expect[expectLength++]=HANGUL_AC00+3;
877
878 expect[expectLength++]=HANGUL_AC00+3;
879
880 expect[expectLength++]=HANGUL_AC00+14*28;
881
882 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
883 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
884 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
885 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
886 for(i=0; i<200; ++i) {
887 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
888 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
889 }
890 for(i=0; i<200; ++i) {
891 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
892 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
893 }
894
895 expect[expectLength++]=HANGUL_KIYEOK;
896 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
897
898 /* try destination overflow first */
899 errorCode=U_ZERO_ERROR;
900 preflightLength=unorm_normalize(input, inLength,
901 UNORM_NFKC, 0,
902 output, 100, /* too short */
903 &errorCode);
904 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
905 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
906 }
907
908 /* real NFKC */
909 errorCode=U_ZERO_ERROR;
910 length=unorm_normalize(input, inLength,
911 UNORM_NFKC, 0,
912 output, UPRV_LENGTHOF(output),
913 &errorCode);
914 if(U_FAILURE(errorCode)) {
915 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
916 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
917 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
918 for(i=0; i<length; ++i) {
919 if(output[i]!=expect[i]) {
920 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
921 break;
922 }
923 }
924 }
925 if(length!=preflightLength) {
926 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
927 }
928
929 /* FCD */
930 u_memcpy(expect, input, hangulPrefixLength);
931 expectLength=hangulPrefixLength;
932
933 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
934 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
935 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
936 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
937 for(i=0; i<200; ++i) {
938 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
939 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
940 }
941 for(i=0; i<200; ++i) {
942 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
943 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
944 }
945
946 expect[expectLength++]=HANGUL_K_KIYEOK;
947 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
948
949 errorCode=U_ZERO_ERROR;
950 length=unorm_normalize(input, inLength,
951 UNORM_FCD, 0,
952 output, UPRV_LENGTHOF(output),
953 &errorCode);
954 if(U_FAILURE(errorCode)) {
955 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
956 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
957 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
958 for(i=0; i<length; ++i) {
959 if(output[i]!=expect[i]) {
960 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
961 break;
962 }
963 }
964 }
965 }
966
967 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
968 static void
TestConcatenate(void)969 TestConcatenate(void) {
970 /* "re + 'sume'" */
971 static const UChar
972 left[]={
973 0x72, 0x65, 0
974 },
975 right[]={
976 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
977 },
978 expect[]={
979 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
980 };
981
982 UChar buffer[100];
983 UErrorCode errorCode;
984 int32_t length;
985
986 /* left with length, right NUL-terminated */
987 errorCode=U_ZERO_ERROR;
988 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
989 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
990 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
991 }
992
993 /* preflighting */
994 errorCode=U_ZERO_ERROR;
995 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
996 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
997 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
998 }
999
1000 buffer[2]=0x5555;
1001 errorCode=U_ZERO_ERROR;
1002 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1003 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1004 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1005 }
1006
1007 /* enter with U_FAILURE */
1008 buffer[2]=0xaaaa;
1009 errorCode=U_UNEXPECTED_TOKEN;
1010 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1011 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1012 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1013 }
1014
1015 /* illegal arguments */
1016 buffer[2]=0xaaaa;
1017 errorCode=U_ZERO_ERROR;
1018 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1019 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1020 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1021 }
1022
1023 errorCode=U_ZERO_ERROR;
1024 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1025 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1026 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1027 }
1028 }
1029
1030 enum {
1031 _PLUS=0x2b
1032 };
1033
1034 static const char *const _modeString[UNORM_MODE_COUNT]={
1035 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1036 };
1037
1038 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1039 _testIter(const UChar *src, int32_t srcLength,
1040 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1041 const UChar *out, int32_t outLength,
1042 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1043 UChar buffer[4];
1044 const UChar *expect, *outLimit, *in;
1045 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1046 UErrorCode errorCode;
1047 UBool neededToNormalize, expectNeeded;
1048
1049 errorCode=U_ZERO_ERROR;
1050 outLimit=out+outLength;
1051 if(forward) {
1052 expect=out;
1053 i=index=0;
1054 } else {
1055 expect=outLimit;
1056 i=srcIndexesLength-2;
1057 index=srcLength;
1058 }
1059
1060 for(;;) {
1061 prevIndex=index;
1062 if(forward) {
1063 if(!iter->hasNext(iter)) {
1064 return;
1065 }
1066 length=unorm_next(iter,
1067 buffer, UPRV_LENGTHOF(buffer),
1068 mode, 0,
1069 (UBool)(out!=NULL), &neededToNormalize,
1070 &errorCode);
1071 expectIndex=srcIndexes[i+1];
1072 in=src+prevIndex;
1073 inLength=expectIndex-prevIndex;
1074
1075 if(out!=NULL) {
1076 /* get output piece from between plus signs */
1077 expectLength=0;
1078 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1079 ++expectLength;
1080 }
1081 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1082 } else {
1083 expect=in;
1084 expectLength=inLength;
1085 expectNeeded=FALSE;
1086 }
1087 } else {
1088 if(!iter->hasPrevious(iter)) {
1089 return;
1090 }
1091 length=unorm_previous(iter,
1092 buffer, UPRV_LENGTHOF(buffer),
1093 mode, 0,
1094 (UBool)(out!=NULL), &neededToNormalize,
1095 &errorCode);
1096 expectIndex=srcIndexes[i];
1097 in=src+expectIndex;
1098 inLength=prevIndex-expectIndex;
1099
1100 if(out!=NULL) {
1101 /* get output piece from between plus signs */
1102 expectLength=0;
1103 while(expect!=out && expect[-1]!=_PLUS) {
1104 ++expectLength;
1105 --expect;
1106 }
1107 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1108 } else {
1109 expect=in;
1110 expectLength=inLength;
1111 expectNeeded=FALSE;
1112 }
1113 }
1114 index=iter->getIndex(iter, UITER_CURRENT);
1115
1116 if(U_FAILURE(errorCode)) {
1117 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1118 forward, _modeString[mode], i, u_errorName(errorCode));
1119 return;
1120 }
1121 if(expectIndex!=index) {
1122 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1123 forward, _modeString[mode], i, index, expectIndex);
1124 return;
1125 }
1126 if(expectLength!=length) {
1127 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1128 forward, _modeString[mode], i, length, expectLength);
1129 return;
1130 }
1131 if(0!=u_memcmp(expect, buffer, length)) {
1132 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1133 forward, _modeString[mode], i);
1134 return;
1135 }
1136 if(neededToNormalize!=expectNeeded) {
1137 }
1138
1139 if(forward) {
1140 expect+=expectLength+1; /* go after the + */
1141 ++i;
1142 } else {
1143 --expect; /* go before the + */
1144 --i;
1145 }
1146 }
1147 }
1148
1149 static void
TestNextPrevious()1150 TestNextPrevious() {
1151 static const UChar
1152 src[]={ /* input string */
1153 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1154 },
1155 nfd[]={ /* + separates expected output pieces */
1156 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1157 },
1158 nfkd[]={
1159 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1160 },
1161 nfc[]={
1162 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1163 },
1164 nfkc[]={
1165 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1166 },
1167 fcd[]={
1168 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1169 };
1170
1171 /* expected iterator indexes in the source string for each iteration piece */
1172 static const int32_t
1173 nfdIndexes[]={
1174 0, 1, 2, 5, 6, 7
1175 },
1176 nfkdIndexes[]={
1177 0, 1, 2, 5, 6, 7
1178 },
1179 nfcIndexes[]={
1180 0, 1, 2, 5, 6, 7
1181 },
1182 nfkcIndexes[]={
1183 0, 1, 2, 5, 7
1184 },
1185 fcdIndexes[]={
1186 0, 1, 2, 5, 6, 7
1187 };
1188
1189 UCharIterator iter;
1190
1191 UChar buffer[4];
1192 int32_t length;
1193
1194 UBool neededToNormalize;
1195 UErrorCode errorCode;
1196
1197 uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1198
1199 /* test iteration with doNormalize */
1200 iter.index=0;
1201 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1202 iter.index=0;
1203 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1204 iter.index=0;
1205 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1206 iter.index=0;
1207 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1208 iter.index=0;
1209 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1210
1211 iter.index=iter.length;
1212 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1213 iter.index=iter.length;
1214 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1215 iter.index=iter.length;
1216 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1217 iter.index=iter.length;
1218 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1219 iter.index=iter.length;
1220 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1221
1222 /* test iteration without doNormalize */
1223 iter.index=0;
1224 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1225 iter.index=0;
1226 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1227 iter.index=0;
1228 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1229 iter.index=0;
1230 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1231 iter.index=0;
1232 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1233
1234 iter.index=iter.length;
1235 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1236 iter.index=iter.length;
1237 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1238 iter.index=iter.length;
1239 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1240 iter.index=iter.length;
1241 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1242 iter.index=iter.length;
1243 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1244
1245 /* try without neededToNormalize */
1246 errorCode=U_ZERO_ERROR;
1247 buffer[0]=5;
1248 iter.index=1;
1249 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1250 UNORM_NFD, 0, TRUE, NULL,
1251 &errorCode);
1252 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1253 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1254 return;
1255 }
1256
1257 /* preflight */
1258 neededToNormalize=9;
1259 iter.index=1;
1260 length=unorm_next(&iter, NULL, 0,
1261 UNORM_NFD, 0, TRUE, &neededToNormalize,
1262 &errorCode);
1263 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1264 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1265 return;
1266 }
1267
1268 errorCode=U_ZERO_ERROR;
1269 buffer[0]=buffer[1]=5;
1270 neededToNormalize=9;
1271 iter.index=1;
1272 length=unorm_next(&iter, buffer, 1,
1273 UNORM_NFD, 0, TRUE, &neededToNormalize,
1274 &errorCode);
1275 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1276 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1277 return;
1278 }
1279
1280 /* no iterator */
1281 errorCode=U_ZERO_ERROR;
1282 buffer[0]=buffer[1]=5;
1283 neededToNormalize=9;
1284 iter.index=1;
1285 length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1286 UNORM_NFD, 0, TRUE, &neededToNormalize,
1287 &errorCode);
1288 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1289 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1290 return;
1291 }
1292
1293 /* illegal mode */
1294 buffer[0]=buffer[1]=5;
1295 neededToNormalize=9;
1296 iter.index=1;
1297 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1298 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1299 &errorCode);
1300 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1301 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1302 return;
1303 }
1304
1305 /* error coming in */
1306 errorCode=U_MISPLACED_QUANTIFIER;
1307 buffer[0]=5;
1308 iter.index=1;
1309 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1310 UNORM_NFD, 0, TRUE, NULL,
1311 &errorCode);
1312 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1313 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1314 return;
1315 }
1316 }
1317
1318 static void
TestFCNFKCClosure(void)1319 TestFCNFKCClosure(void) {
1320 static const struct {
1321 UChar32 c;
1322 const UChar s[6];
1323 } tests[]={
1324 { 0x00C4, { 0 } },
1325 { 0x00E4, { 0 } },
1326 { 0x037A, { 0x0020, 0x03B9, 0 } },
1327 { 0x03D2, { 0x03C5, 0 } },
1328 { 0x20A8, { 0x0072, 0x0073, 0 } },
1329 { 0x210B, { 0x0068, 0 } },
1330 { 0x210C, { 0x0068, 0 } },
1331 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1332 { 0x2122, { 0x0074, 0x006D, 0 } },
1333 { 0x2128, { 0x007A, 0 } },
1334 { 0x1D5DB, { 0x0068, 0 } },
1335 { 0x1D5ED, { 0x007A, 0 } },
1336 { 0x0061, { 0 } }
1337 };
1338
1339 UChar buffer[8];
1340 UErrorCode errorCode;
1341 int32_t i, length;
1342
1343 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1344 errorCode=U_ZERO_ERROR;
1345 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1346 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1347 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1348 }
1349 }
1350
1351 /* error handling */
1352 errorCode=U_ZERO_ERROR;
1353 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1354 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1355 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1356 }
1357
1358 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1359 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1360 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1361 }
1362 }
1363
1364 static void
TestQuickCheckPerCP()1365 TestQuickCheckPerCP() {
1366 UErrorCode errorCode;
1367 UChar32 c, lead, trail;
1368 UChar s[U16_MAX_LENGTH], nfd[16];
1369 int32_t length, lccc1, lccc2, tccc1, tccc2;
1370 int32_t qc1, qc2;
1371
1372 if(
1373 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1374 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1375 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1376 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1377 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1378 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1379 ) {
1380 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1381 }
1382
1383 /*
1384 * compare the quick check property values for some code points
1385 * to the quick check results for checking same-code point strings
1386 */
1387 errorCode=U_ZERO_ERROR;
1388 c=0;
1389 while(c<0x110000) {
1390 length=0;
1391 U16_APPEND_UNSAFE(s, length, c);
1392
1393 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1394 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1395 if(qc1!=qc2) {
1396 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1397 }
1398
1399 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1400 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1401 if(qc1!=qc2) {
1402 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1403 }
1404
1405 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1406 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1407 if(qc1!=qc2) {
1408 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1409 }
1410
1411 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1412 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1413 if(qc1!=qc2) {
1414 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1415 }
1416
1417 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1418 if (U_FAILURE(errorCode)) {
1419 log_data_err("%s:%d errorCode=%s\n", __FILE__, __LINE__, u_errorName(errorCode));
1420 break;
1421 }
1422
1423 /* length-length == 0 is used to get around a compiler warning. */
1424 U16_GET(nfd, 0, length-length, length, lead);
1425 U16_GET(nfd, 0, length-1, length, trail);
1426
1427 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1428 lccc2=u_getCombiningClass(lead);
1429 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1430 tccc2=u_getCombiningClass(trail);
1431
1432 if(lccc1!=lccc2) {
1433 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1434 lccc1, lccc2, c);
1435 }
1436 if(tccc1!=tccc2) {
1437 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1438 tccc1, tccc2, c);
1439 }
1440
1441 /* skip some code points */
1442 c=(20*c)/19+1;
1443 }
1444 }
1445
1446 static void
TestComposition(void)1447 TestComposition(void) {
1448 static const struct {
1449 UNormalizationMode mode;
1450 uint32_t options;
1451 UChar input[12];
1452 UChar expect[12];
1453 } cases[]={
1454 /*
1455 * special cases for UAX #15 bug
1456 * see Unicode Corrigendum #5: Normalization Idempotency
1457 * at http://unicode.org/versions/corrigendum5.html
1458 * (was Public Review Issue #29)
1459 */
1460 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1461 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1462 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1463 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1464
1465 /* TODO: add test cases for UNORM_FCC here (j2151) */
1466 };
1467
1468 UChar output[16];
1469 UErrorCode errorCode;
1470 int32_t i, length;
1471
1472 for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1473 errorCode=U_ZERO_ERROR;
1474 length=unorm_normalize(
1475 cases[i].input, -1,
1476 cases[i].mode, cases[i].options,
1477 output, UPRV_LENGTHOF(output),
1478 &errorCode);
1479 if( U_FAILURE(errorCode) ||
1480 length!=u_strlen(cases[i].expect) ||
1481 0!=u_memcmp(output, cases[i].expect, length)
1482 ) {
1483 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1484 }
1485 }
1486 }
1487
1488 static void
TestGetDecomposition()1489 TestGetDecomposition() {
1490 UChar decomp[32];
1491 int32_t length;
1492
1493 UErrorCode errorCode=U_ZERO_ERROR;
1494 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1495 if(U_FAILURE(errorCode)) {
1496 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1497 return;
1498 }
1499
1500 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1501 if(U_FAILURE(errorCode) || length>=0) {
1502 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1503 }
1504 errorCode=U_ZERO_ERROR;
1505 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1506 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1507 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1508 }
1509 errorCode=U_ZERO_ERROR;
1510 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1511 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1512 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1513 }
1514 errorCode=U_ZERO_ERROR;
1515 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1516 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1517 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1518 }
1519 errorCode=U_ZERO_ERROR;
1520 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1521 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1522 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1523 }
1524 errorCode=U_ZERO_ERROR;
1525 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1526 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1527 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1528 }
1529 }
1530
1531 static void
TestGetRawDecomposition()1532 TestGetRawDecomposition() {
1533 UChar decomp[32];
1534 int32_t length;
1535
1536 UErrorCode errorCode=U_ZERO_ERROR;
1537 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1538 if(U_FAILURE(errorCode)) {
1539 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1540 return;
1541 }
1542 /*
1543 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1544 * without recursive decomposition.
1545 */
1546
1547 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1548 if(U_FAILURE(errorCode) || length>=0) {
1549 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1550 }
1551 errorCode=U_ZERO_ERROR;
1552 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1553 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1554 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1555 }
1556 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1557 errorCode=U_ZERO_ERROR;
1558 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1559 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1560 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1561 }
1562 /* U+212B ANGSTROM SIGN */
1563 errorCode=U_ZERO_ERROR;
1564 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1565 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1566 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1567 }
1568 errorCode=U_ZERO_ERROR;
1569 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1570 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1571 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1572 }
1573 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1574 errorCode=U_ZERO_ERROR;
1575 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1576 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1577 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1578 }
1579 errorCode=U_ZERO_ERROR;
1580 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1581 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1582 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1583 }
1584 errorCode=U_ZERO_ERROR;
1585 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1586 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1587 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1588 }
1589 errorCode=U_ZERO_ERROR;
1590 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1591 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1592 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1593 }
1594 }
1595
1596 static void
TestAppendRestoreMiddle()1597 TestAppendRestoreMiddle() {
1598 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1599 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1600 /* NFC: C5 is 'A with ring above' */
1601 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1602 int32_t length;
1603 UErrorCode errorCode=U_ZERO_ERROR;
1604 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1605 if(U_FAILURE(errorCode)) {
1606 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1607 return;
1608 }
1609 /*
1610 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1611 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1612 * still fits into a[] but the full result still overflows this capacity.
1613 * (Let it modify the destination buffer before reallocating internally.)
1614 */
1615 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1616 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1617 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1618 return;
1619 }
1620 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1621 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1622 log_err("unorm2_append(overflow) modified the first string\n");
1623 return;
1624 }
1625 errorCode=U_ZERO_ERROR;
1626 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1627 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1628 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1629 return;
1630 }
1631 }
1632
1633 static void
TestGetEasyToUseInstance()1634 TestGetEasyToUseInstance() {
1635 static const UChar in[]={
1636 0xA0, /* -> <noBreak> 0020 */
1637 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1638 };
1639 UChar out[32];
1640 int32_t length;
1641
1642 UErrorCode errorCode=U_ZERO_ERROR;
1643 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1644 if(U_FAILURE(errorCode)) {
1645 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1646 return;
1647 }
1648 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1649 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1650 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1651 (int)length, u_errorName(errorCode));
1652 }
1653
1654 errorCode=U_ZERO_ERROR;
1655 n2=unorm2_getNFDInstance(&errorCode);
1656 if(U_FAILURE(errorCode)) {
1657 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1658 return;
1659 }
1660 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1661 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1662 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1663 (int)length, u_errorName(errorCode));
1664 }
1665
1666 errorCode=U_ZERO_ERROR;
1667 n2=unorm2_getNFKCInstance(&errorCode);
1668 if(U_FAILURE(errorCode)) {
1669 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1670 return;
1671 }
1672 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1673 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1674 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1675 (int)length, u_errorName(errorCode));
1676 }
1677
1678 errorCode=U_ZERO_ERROR;
1679 n2=unorm2_getNFKDInstance(&errorCode);
1680 if(U_FAILURE(errorCode)) {
1681 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1682 return;
1683 }
1684 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1685 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1686 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1687 (int)length, u_errorName(errorCode));
1688 }
1689
1690 errorCode=U_ZERO_ERROR;
1691 n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1692 if(U_FAILURE(errorCode)) {
1693 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1694 return;
1695 }
1696 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1697 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1698 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1699 (int)length, u_errorName(errorCode));
1700 }
1701 }
1702
1703 static void
TestAPICoverage()1704 TestAPICoverage() {
1705 UErrorCode errorCode = U_ZERO_ERROR;
1706 const UNormalizer2 *n2 = unorm2_getNFDInstance(&errorCode);
1707 if (U_FAILURE(errorCode)) {
1708 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1709 return;
1710 }
1711
1712 if (!unorm2_hasBoundaryBefore(n2, u'C') || unorm2_hasBoundaryBefore(n2, 0x300)) {
1713 log_err("unorm2_hasBoundaryBefore() returns unexpected results\n");
1714 }
1715
1716 if (!unorm2_hasBoundaryAfter(n2, u'C') || unorm2_hasBoundaryAfter(n2, 0x300)) {
1717 log_err("unorm2_hasBoundaryAfter() returns unexpected results\n");
1718 }
1719
1720 if (!unorm2_isInert(n2, 0x50005) || unorm2_isInert(n2, 0x300)) {
1721 log_err("unorm2_isInert() returns unexpected results\n");
1722 }
1723
1724 errorCode = U_ZERO_ERROR;
1725 if (!unorm2_isNormalized(n2, u"c\u0327\u0300", 3, &errorCode) ||
1726 unorm2_isNormalized(n2, u"c\u0300\u0327", 3, &errorCode) ||
1727 U_FAILURE(errorCode)) {
1728 log_err("unorm2_isNormalized() returns unexpected results\n");
1729 }
1730
1731 errorCode = U_ZERO_ERROR;
1732 if (unorm2_quickCheck(n2, u"c\u0327\u0300", 3, &errorCode) == UNORM_NO ||
1733 unorm2_quickCheck(n2, u"c\u0300\u0327", 3, &errorCode) == UNORM_YES ||
1734 U_FAILURE(errorCode)) {
1735 log_err("unorm2_quickCheck() returns unexpected results\n");
1736 }
1737
1738 errorCode = U_ZERO_ERROR;
1739 if (unorm2_spanQuickCheckYes(n2, u"c\u0327\u0300", 3, &errorCode) != 3 ||
1740 unorm2_spanQuickCheckYes(n2, u"c\u0300\u0327", 3, &errorCode) != 1 ||
1741 U_FAILURE(errorCode)) {
1742 log_err("unorm2_spanQuickCheckYes() returns unexpected results\n");
1743 }
1744
1745 errorCode = U_ZERO_ERROR;
1746 UChar first[10] = { u'c', 0x300, 0, 0, 0, 0, 0, 0, 0, 0 };
1747 int32_t length = unorm2_normalizeSecondAndAppend(
1748 n2, first, 2, UPRV_LENGTHOF(first), u"\u0327d", 2, &errorCode);
1749 if (U_FAILURE(errorCode) || length != 4 || u_strcmp(first, u"c\u0327\u0300d") != 0) {
1750 log_err("unorm2_normalizeSecondAndAppend() returns unexpected results\n");
1751 }
1752 }
1753
1754 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1755