1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 * synwee added test for quick check
16 * synwee added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24
25 #if !UCONFIG_NO_NORMALIZATION
26
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <time.h>
30 #include "unicode/uchar.h"
31 #include "unicode/ustring.h"
32 #include "unicode/unorm.h"
33 #include "cnormtst.h"
34
35 static void
36 TestAPI(void);
37
38 static void
39 TestNormCoverage(void);
40
41 static void
42 TestConcatenate(void);
43
44 static void
45 TestNextPrevious(void);
46
47 static void TestIsNormalized(void);
48
49 static void
50 TestFCNFKCClosure(void);
51
52 static void
53 TestQuickCheckPerCP(void);
54
55 static void
56 TestComposition(void);
57
58 static void
59 TestFCD(void);
60
61 static void
62 TestGetDecomposition(void);
63
64 static void
65 TestGetRawDecomposition(void);
66
67 static void TestAppendRestoreMiddle(void);
68 static void TestGetEasyToUseInstance(void);
69 static void TestAPICoverage(void);
70
71 static const char* const canonTests[][3] = {
72 /* Input*/ /*Decomposed*/ /*Composed*/
73 { "cat", "cat", "cat" },
74 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
75
76 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
77 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
78
79 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
80 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
81 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
82
83 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
84 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
85
86 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
87 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
88 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
89
90 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
91 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
92
93 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
94 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
95
96 { "Henry IV", "Henry IV", "Henry IV" },
97 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
98
99 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
100 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
101 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
102 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
103 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
104 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
105 { "", "", "" }
106 };
107
108 static const char* const compatTests[][3] = {
109 /* Input*/ /*Decomposed */ /*Composed*/
110 { "cat", "cat", "cat" },
111
112 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
113
114 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
115 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
116
117 { "Henry IV", "Henry IV", "Henry IV" },
118 { "Henry \\u2163", "Henry IV", "Henry IV" },
119
120 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
121 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
122
123 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
124
125 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
126 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
127 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
128 { "", "", "" }
129 };
130
131 static const char* const fcdTests[][3] = {
132 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
133 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */
134 { "\\u010e", "\\u010e", NULL } /* D-caron */
135 };
136
137 void addNormTest(TestNode** root);
138
addNormTest(TestNode ** root)139 void addNormTest(TestNode** root)
140 {
141 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
142 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
143 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
144 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
145 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
146 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
147 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
148 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
149 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
150 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
151 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
152 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
153 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
154 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
155 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
156 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
157 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
158 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
159 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
160 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
161 addTest(root, &TestAPICoverage, "tsnorm/cnormtst/TestAPICoverage");
162 }
163
164 static const char* const modeStrings[]={
165 "?",
166 "UNORM_NONE",
167 "UNORM_NFD",
168 "UNORM_NFKD",
169 "UNORM_NFC",
170 "UNORM_NFKC",
171 "UNORM_FCD",
172 "UNORM_MODE_COUNT"
173 };
174
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)175 static void TestNormCases(UNormalizationMode mode,
176 const char* const cases[][3], int32_t lengthOfCases) {
177 int32_t x, neededLen, length2;
178 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
179 UChar *source=NULL;
180 UChar result[16];
181 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
182 for(x=0; x < lengthOfCases; x++)
183 {
184 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
185 source=CharsToUChars(cases[x][0]);
186 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
187 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
188 if(neededLen!=length2) {
189 log_err("ERROR in unorm_normalize(%s)[%d]: "
190 "preflight length/srcLength %d!=%d preflight length/NUL\n",
191 modeStrings[mode], (int)x, (int)neededLen, (int)length2);
192 }
193 if(status==U_BUFFER_OVERFLOW_ERROR)
194 {
195 status=U_ZERO_ERROR;
196 }
197 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
198 if(U_FAILURE(status) || neededLen!=length2) {
199 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
200 modeStrings[mode], austrdup(source), myErrorName(status));
201 } else {
202 assertEqual(result, cases[x][expIndex], x);
203 }
204 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
205 if(U_FAILURE(status) || neededLen!=length2) {
206 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
207 modeStrings[mode], austrdup(source), myErrorName(status));
208 } else {
209 assertEqual(result, cases[x][expIndex], x);
210 }
211 free(source);
212 }
213 }
214
TestDecomp()215 void TestDecomp() {
216 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
217 }
218
TestCompatDecomp()219 void TestCompatDecomp() {
220 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
221 }
222
TestCanonDecompCompose()223 void TestCanonDecompCompose() {
224 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
225 }
226
TestCompatDecompCompose()227 void TestCompatDecompCompose() {
228 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
229 }
230
TestFCD()231 void TestFCD() {
232 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
233 }
234
assertEqual(const UChar * result,const char * expected,int32_t index)235 static void assertEqual(const UChar* result, const char* expected, int32_t index)
236 {
237 UChar *expectedUni = CharsToUChars(expected);
238 if(u_strcmp(result, expectedUni)!=0){
239 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
240 austrdup(result) );
241 }
242 free(expectedUni);
243 }
244
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)245 static void TestNull_check(UChar *src, int32_t srcLen,
246 UChar *exp, int32_t expLen,
247 UNormalizationMode mode,
248 const char *name)
249 {
250 UErrorCode status = U_ZERO_ERROR;
251 int32_t len, i;
252
253 UChar result[50];
254
255
256 status = U_ZERO_ERROR;
257
258 for(i=0;i<50;i++)
259 {
260 result[i] = 0xFFFD;
261 }
262
263 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
264
265 if(U_FAILURE(status)) {
266 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
267 } else if (len != expLen) {
268 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
269 }
270
271 {
272 for(i=0;i<len;i++){
273 if(exp[i] != result[i]) {
274 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
275 name,
276 i,
277 exp[i],
278 result[i]);
279 return;
280 }
281 log_verbose(" %d: \\u%04X\n", i, result[i]);
282 }
283 }
284
285 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
286 }
287
TestNull()288 void TestNull()
289 {
290
291 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
292 int32_t source_comp_len = 4;
293 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
294 int32_t expect_comp_len = 3;
295
296 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
297 int32_t source_dcmp_len = 3;
298 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
299 int32_t expect_dcmp_len = 5;
300
301 TestNull_check(source_comp,
302 source_comp_len,
303 expect_comp,
304 expect_comp_len,
305 UNORM_NFC,
306 "UNORM_NFC");
307
308 TestNull_check(source_dcmp,
309 source_dcmp_len,
310 expect_dcmp,
311 expect_dcmp_len,
312 UNORM_NFD,
313 "UNORM_NFD");
314
315 TestNull_check(source_comp,
316 source_comp_len,
317 expect_comp,
318 expect_comp_len,
319 UNORM_NFKC,
320 "UNORM_NFKC");
321
322
323 }
324
TestQuickCheckResultNO()325 static void TestQuickCheckResultNO()
326 {
327 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
328 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
329 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
330 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
331 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
332 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
333 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
334 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
335
336
337 const int SIZE = 10;
338
339 int count = 0;
340 UErrorCode error = U_ZERO_ERROR;
341
342 for (; count < SIZE; count ++)
343 {
344 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
345 UNORM_NO)
346 {
347 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
348 return;
349 }
350 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
351 UNORM_NO)
352 {
353 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
354 return;
355 }
356 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
357 UNORM_NO)
358 {
359 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
360 return;
361 }
362 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
363 UNORM_NO)
364 {
365 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
366 return;
367 }
368 }
369 }
370
371
TestQuickCheckResultYES()372 static void TestQuickCheckResultYES()
373 {
374 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
375 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
376 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
377 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
378 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
379 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
380 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
381 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
382
383 const int SIZE = 10;
384 int count = 0;
385 UErrorCode error = U_ZERO_ERROR;
386
387 UChar cp = 0;
388 while (cp < 0xA0)
389 {
390 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
391 {
392 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
393 return;
394 }
395 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
396 UNORM_YES)
397 {
398 log_err("ERROR in NFC quick check at U+%04x\n", cp);
399 return;
400 }
401 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
402 {
403 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
404 return;
405 }
406 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
407 UNORM_YES)
408 {
409 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
410 return;
411 }
412 cp ++;
413 }
414
415 for (; count < SIZE; count ++)
416 {
417 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
418 UNORM_YES)
419 {
420 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
421 return;
422 }
423 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
424 != UNORM_YES)
425 {
426 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
427 return;
428 }
429 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
430 UNORM_YES)
431 {
432 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
433 return;
434 }
435 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
436 UNORM_YES)
437 {
438 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
439 return;
440 }
441 }
442 }
443
TestQuickCheckResultMAYBE()444 static void TestQuickCheckResultMAYBE()
445 {
446 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
447 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
448 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
449 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
450
451
452 const int SIZE = 10;
453
454 int count = 0;
455 UErrorCode error = U_ZERO_ERROR;
456
457 /* NFD and NFKD does not have any MAYBE codepoints */
458 for (; count < SIZE; count ++)
459 {
460 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
461 UNORM_MAYBE)
462 {
463 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
464 return;
465 }
466 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
467 UNORM_MAYBE)
468 {
469 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
470 return;
471 }
472 }
473 }
474
TestQuickCheckStringResult()475 static void TestQuickCheckStringResult()
476 {
477 int count;
478 UChar *d = NULL;
479 UChar *c = NULL;
480 UErrorCode error = U_ZERO_ERROR;
481
482 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
483 {
484 d = CharsToUChars(canonTests[count][1]);
485 c = CharsToUChars(canonTests[count][2]);
486 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
487 UNORM_YES)
488 {
489 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
490 free(d); free(c);
491 return;
492 }
493
494 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
495 UNORM_NO)
496 {
497 log_err("ERROR in NFC quick check for string at count %d\n", count);
498 free(d); free(c);
499 return;
500 }
501
502 free(d);
503 free(c);
504 }
505
506 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
507 {
508 d = CharsToUChars(compatTests[count][1]);
509 c = CharsToUChars(compatTests[count][2]);
510 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
511 UNORM_YES)
512 {
513 log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
514 free(d); free(c);
515 return;
516 }
517
518 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
519 UNORM_YES)
520 {
521 log_err("ERROR in NFKC quick check for string at count %d\n", count);
522 free(d); free(c);
523 return;
524 }
525
526 free(d);
527 free(c);
528 }
529 }
530
TestQuickCheck()531 void TestQuickCheck()
532 {
533 TestQuickCheckResultNO();
534 TestQuickCheckResultYES();
535 TestQuickCheckResultMAYBE();
536 TestQuickCheckStringResult();
537 }
538
539 /*
540 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
541 * normalized, and some that are not.
542 * Here we pick some specific cases and test the C API.
543 */
TestIsNormalized(void)544 static void TestIsNormalized(void) {
545 static const UChar notNFC[][8]={ /* strings that are not in NFC */
546 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
547 { 0xfb1d, 0 }, /* excluded from composition */
548 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
549 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
550 };
551 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
552 { 0x1100, 0x1161, 0 }, /* Jamo compose */
553 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
554 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
555 };
556
557 int32_t i;
558 UErrorCode errorCode;
559
560 /* API test */
561
562 /* normal case with length>=0 (length -1 used for special cases below) */
563 errorCode=U_ZERO_ERROR;
564 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
565 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
566 }
567
568 /* incoming U_FAILURE */
569 errorCode=U_TRUNCATED_CHAR_FOUND;
570 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
571 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
572 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
573 }
574
575 /* NULL source */
576 errorCode=U_ZERO_ERROR;
577 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
578 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
579 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
580 }
581
582 /* bad length */
583 errorCode=U_ZERO_ERROR;
584 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
585 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
586 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
587 }
588
589 /* specific cases */
590 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
591 errorCode=U_ZERO_ERROR;
592 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
593 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
594 }
595 errorCode=U_ZERO_ERROR;
596 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
597 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
598 }
599 }
600 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
601 errorCode=U_ZERO_ERROR;
602 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
603 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
604 }
605 }
606 }
607
TestCheckFCD()608 void TestCheckFCD()
609 {
610 UErrorCode status = U_ZERO_ERROR;
611 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
612 0x0A};
613 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
614 0x02B9, 0x0314, 0x0315, 0x0316};
615 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
616 0x0050, 0x0730, 0x09EE, 0x1E10};
617
618 static const UChar datastr[][5] =
619 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
620 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
621 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
622 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
623 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
624
625 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
626 0x6a,
627 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
628 0xea,
629 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
630 0x0307, 0x0308, 0x0309, 0x030a,
631 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
632 0x0327, 0x0328, 0x0329, 0x032a,
633 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
634 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
635
636 int count = 0;
637
638 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
639 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
640 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
641 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
642 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
643 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
644
645 if (U_FAILURE(status))
646 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
647
648 while (count < 4)
649 {
650 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
651 if (U_FAILURE(status)) {
652 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data set %d - (Are you missing data?)\n", count);
653 break;
654 }
655 else {
656 if (result[count] != fcdresult) {
657 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
658 result[count]);
659 }
660 }
661 count ++;
662 }
663
664 /* random checks of long strings */
665 status = U_ZERO_ERROR;
666 srand((unsigned)time( NULL ));
667
668 for (count = 0; count < 50; count ++)
669 {
670 int size = 0;
671 UNormalizationCheckResult testresult = UNORM_YES;
672 UChar data[20];
673 UChar norm[100];
674 UChar nfd[100];
675 int normsize = 0;
676 int nfdsize = 0;
677
678 while (size != 19) {
679 data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
680 log_verbose("0x%x", data[size]);
681 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
682 norm + normsize, 100 - normsize, &status);
683 if (U_FAILURE(status)) {
684 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at data generation - (Are you missing data?)\n");
685 break;
686 }
687 size ++;
688 }
689 log_verbose("\n");
690
691 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
692 nfd, 100, &status);
693 if (U_FAILURE(status)) {
694 log_data_err("unorm_quickCheck(FCD) failed: exception occurred at normalized data generation - (Are you missing data?)\n");
695 }
696
697 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
698 testresult = UNORM_NO;
699 }
700 if (testresult == UNORM_YES) {
701 log_verbose("result UNORM_YES\n");
702 }
703 else {
704 log_verbose("result UNORM_NO\n");
705 }
706
707 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
708 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
709 }
710 }
711 }
712
713 static void
TestAPI()714 TestAPI() {
715 static const UChar in[]={ 0x68, 0xe4 };
716 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
717 UErrorCode errorCode;
718 int32_t length;
719
720 /* try preflighting */
721 errorCode=U_ZERO_ERROR;
722 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
723 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
724 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
725 return;
726 }
727
728 errorCode=U_ZERO_ERROR;
729 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
730 if(U_FAILURE(errorCode)) {
731 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
732 return;
733 }
734 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
735 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
736 return;
737 }
738 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
739 if(U_FAILURE(errorCode)) {
740 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
741 return;
742 }
743 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
744 if(U_FAILURE(errorCode)) {
745 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
746 return;
747 }
748 }
749
750 /* test cases to improve test code coverage */
751 enum {
752 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
753 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
754 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
755
756 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
757 HANGUL_WEO=0x116f, /* Jamo V U+116f */
758 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
759
760 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
761 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
762
763 MUSICAL_VOID_NOTEHEAD=0x1d157,
764 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
765 MUSICAL_STEM=0x1d165, /* cc=216 */
766 MUSICAL_STACCATO=0x1d17c /* cc=220 */
767 };
768
769 static void
TestNormCoverage()770 TestNormCoverage() {
771 UChar input[1000], expect[1000], output[1000];
772 UErrorCode errorCode;
773 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
774
775 /* create a long and nasty string with NFKC-unsafe characters */
776 inLength=0;
777
778 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
779 input[inLength++]=HANGUL_KIYEOK;
780 input[inLength++]=HANGUL_WEO;
781 input[inLength++]=HANGUL_KIYEOK_SIOS;
782
783 input[inLength++]=HANGUL_KIYEOK;
784 input[inLength++]=HANGUL_WEO;
785 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
786
787 input[inLength++]=HANGUL_KIYEOK;
788 input[inLength++]=HANGUL_K_WEO;
789 input[inLength++]=HANGUL_KIYEOK_SIOS;
790
791 input[inLength++]=HANGUL_KIYEOK;
792 input[inLength++]=HANGUL_K_WEO;
793 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
794
795 input[inLength++]=HANGUL_K_KIYEOK;
796 input[inLength++]=HANGUL_WEO;
797 input[inLength++]=HANGUL_KIYEOK_SIOS;
798
799 input[inLength++]=HANGUL_K_KIYEOK;
800 input[inLength++]=HANGUL_WEO;
801 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
802
803 input[inLength++]=HANGUL_K_KIYEOK;
804 input[inLength++]=HANGUL_K_WEO;
805 input[inLength++]=HANGUL_KIYEOK_SIOS;
806
807 input[inLength++]=HANGUL_K_KIYEOK;
808 input[inLength++]=HANGUL_K_WEO;
809 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
810
811 /* Hangul LV with normal/compatibility Jamo T */
812 input[inLength++]=HANGUL_AC00;
813 input[inLength++]=HANGUL_KIYEOK_SIOS;
814
815 input[inLength++]=HANGUL_AC00;
816 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
817
818 /* compatibility Jamo L, V */
819 input[inLength++]=HANGUL_K_KIYEOK;
820 input[inLength++]=HANGUL_K_WEO;
821
822 hangulPrefixLength=inLength;
823
824 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
825 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
826 for(i=0; i<200; ++i) {
827 input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
828 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
829 input[inLength++]=U16_LEAD(MUSICAL_STEM);
830 input[inLength++]=U16_TRAIL(MUSICAL_STEM);
831 }
832
833 /* (compatibility) Jamo L, T do not compose */
834 input[inLength++]=HANGUL_K_KIYEOK;
835 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
836
837 /* quick checks */
838 errorCode=U_ZERO_ERROR;
839 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
840 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
841 }
842 errorCode=U_ZERO_ERROR;
843 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
844 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
845 }
846 errorCode=U_ZERO_ERROR;
847 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
848 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
849 }
850 errorCode=U_ZERO_ERROR;
851 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
852 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
853 }
854 errorCode=U_ZERO_ERROR;
855 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
856 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
857 }
858
859 /* NFKC */
860 expectLength=0;
861 expect[expectLength++]=HANGUL_SYLLABLE;
862
863 expect[expectLength++]=HANGUL_SYLLABLE;
864
865 expect[expectLength++]=HANGUL_SYLLABLE;
866
867 expect[expectLength++]=HANGUL_SYLLABLE;
868
869 expect[expectLength++]=HANGUL_SYLLABLE;
870
871 expect[expectLength++]=HANGUL_SYLLABLE;
872
873 expect[expectLength++]=HANGUL_SYLLABLE;
874
875 expect[expectLength++]=HANGUL_SYLLABLE;
876
877 expect[expectLength++]=HANGUL_AC00+3;
878
879 expect[expectLength++]=HANGUL_AC00+3;
880
881 expect[expectLength++]=HANGUL_AC00+14*28;
882
883 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
884 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
885 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
886 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
887 for(i=0; i<200; ++i) {
888 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
889 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
890 }
891 for(i=0; i<200; ++i) {
892 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
893 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
894 }
895
896 expect[expectLength++]=HANGUL_KIYEOK;
897 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
898
899 /* try destination overflow first */
900 errorCode=U_ZERO_ERROR;
901 preflightLength=unorm_normalize(input, inLength,
902 UNORM_NFKC, 0,
903 output, 100, /* too short */
904 &errorCode);
905 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
906 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
907 }
908
909 /* real NFKC */
910 errorCode=U_ZERO_ERROR;
911 length=unorm_normalize(input, inLength,
912 UNORM_NFKC, 0,
913 output, UPRV_LENGTHOF(output),
914 &errorCode);
915 if(U_FAILURE(errorCode)) {
916 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
917 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
918 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
919 for(i=0; i<length; ++i) {
920 if(output[i]!=expect[i]) {
921 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
922 break;
923 }
924 }
925 }
926 if(length!=preflightLength) {
927 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
928 }
929
930 /* FCD */
931 u_memcpy(expect, input, hangulPrefixLength);
932 expectLength=hangulPrefixLength;
933
934 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
935 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
936 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
937 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
938 for(i=0; i<200; ++i) {
939 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
940 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
941 }
942 for(i=0; i<200; ++i) {
943 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
944 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
945 }
946
947 expect[expectLength++]=HANGUL_K_KIYEOK;
948 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
949
950 errorCode=U_ZERO_ERROR;
951 length=unorm_normalize(input, inLength,
952 UNORM_FCD, 0,
953 output, UPRV_LENGTHOF(output),
954 &errorCode);
955 if(U_FAILURE(errorCode)) {
956 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
957 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
958 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
959 for(i=0; i<length; ++i) {
960 if(output[i]!=expect[i]) {
961 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
962 break;
963 }
964 }
965 }
966 }
967
968 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
969 static void
TestConcatenate(void)970 TestConcatenate(void) {
971 /* "re + 'sume'" */
972 static const UChar
973 left[]={
974 0x72, 0x65, 0
975 },
976 right[]={
977 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
978 },
979 expect[]={
980 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
981 };
982
983 UChar buffer[100];
984 UErrorCode errorCode;
985 int32_t length;
986
987 /* left with length, right NUL-terminated */
988 errorCode=U_ZERO_ERROR;
989 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
990 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
991 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
992 }
993
994 /* preflighting */
995 errorCode=U_ZERO_ERROR;
996 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
997 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
998 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
999 }
1000
1001 buffer[2]=0x5555;
1002 errorCode=U_ZERO_ERROR;
1003 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1004 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1005 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1006 }
1007
1008 /* enter with U_FAILURE */
1009 buffer[2]=0xaaaa;
1010 errorCode=U_UNEXPECTED_TOKEN;
1011 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1012 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1013 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1014 }
1015
1016 /* illegal arguments */
1017 buffer[2]=0xaaaa;
1018 errorCode=U_ZERO_ERROR;
1019 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1020 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1021 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1022 }
1023
1024 errorCode=U_ZERO_ERROR;
1025 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1026 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1027 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1028 }
1029 }
1030
1031 enum {
1032 _PLUS=0x2b
1033 };
1034
1035 static const char *const _modeString[UNORM_MODE_COUNT]={
1036 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1037 };
1038
1039 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1040 _testIter(const UChar *src, int32_t srcLength,
1041 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1042 const UChar *out, int32_t outLength,
1043 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1044 UChar buffer[4];
1045 const UChar *expect, *outLimit, *in;
1046 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1047 UErrorCode errorCode;
1048 UBool neededToNormalize, expectNeeded;
1049
1050 errorCode=U_ZERO_ERROR;
1051 outLimit=out+outLength;
1052 if(forward) {
1053 expect=out;
1054 i=index=0;
1055 } else {
1056 expect=outLimit;
1057 i=srcIndexesLength-2;
1058 index=srcLength;
1059 }
1060
1061 for(;;) {
1062 prevIndex=index;
1063 if(forward) {
1064 if(!iter->hasNext(iter)) {
1065 return;
1066 }
1067 length=unorm_next(iter,
1068 buffer, UPRV_LENGTHOF(buffer),
1069 mode, 0,
1070 (UBool)(out!=NULL), &neededToNormalize,
1071 &errorCode);
1072 expectIndex=srcIndexes[i+1];
1073 in=src+prevIndex;
1074 inLength=expectIndex-prevIndex;
1075
1076 if(out!=NULL) {
1077 /* get output piece from between plus signs */
1078 expectLength=0;
1079 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1080 ++expectLength;
1081 }
1082 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1083 } else {
1084 expect=in;
1085 expectLength=inLength;
1086 expectNeeded=false;
1087 }
1088 } else {
1089 if(!iter->hasPrevious(iter)) {
1090 return;
1091 }
1092 length=unorm_previous(iter,
1093 buffer, UPRV_LENGTHOF(buffer),
1094 mode, 0,
1095 (UBool)(out!=NULL), &neededToNormalize,
1096 &errorCode);
1097 expectIndex=srcIndexes[i];
1098 in=src+expectIndex;
1099 inLength=prevIndex-expectIndex;
1100
1101 if(out!=NULL) {
1102 /* get output piece from between plus signs */
1103 expectLength=0;
1104 while(expect!=out && expect[-1]!=_PLUS) {
1105 ++expectLength;
1106 --expect;
1107 }
1108 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1109 } else {
1110 expect=in;
1111 expectLength=inLength;
1112 expectNeeded=false;
1113 }
1114 }
1115 index=iter->getIndex(iter, UITER_CURRENT);
1116
1117 if(U_FAILURE(errorCode)) {
1118 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1119 forward, _modeString[mode], i, u_errorName(errorCode));
1120 return;
1121 }
1122 if(expectIndex!=index) {
1123 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1124 forward, _modeString[mode], i, index, expectIndex);
1125 return;
1126 }
1127 if(expectLength!=length) {
1128 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1129 forward, _modeString[mode], i, length, expectLength);
1130 return;
1131 }
1132 if(0!=u_memcmp(expect, buffer, length)) {
1133 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1134 forward, _modeString[mode], i);
1135 return;
1136 }
1137 if(neededToNormalize!=expectNeeded) {
1138 }
1139
1140 if(forward) {
1141 expect+=expectLength+1; /* go after the + */
1142 ++i;
1143 } else {
1144 --expect; /* go before the + */
1145 --i;
1146 }
1147 }
1148 }
1149
1150 static void
TestNextPrevious()1151 TestNextPrevious() {
1152 static const UChar
1153 src[]={ /* input string */
1154 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1155 },
1156 nfd[]={ /* + separates expected output pieces */
1157 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1158 },
1159 nfkd[]={
1160 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1161 },
1162 nfc[]={
1163 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1164 },
1165 nfkc[]={
1166 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1167 },
1168 fcd[]={
1169 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1170 };
1171
1172 /* expected iterator indexes in the source string for each iteration piece */
1173 static const int32_t
1174 nfdIndexes[]={
1175 0, 1, 2, 5, 6, 7
1176 },
1177 nfkdIndexes[]={
1178 0, 1, 2, 5, 6, 7
1179 },
1180 nfcIndexes[]={
1181 0, 1, 2, 5, 6, 7
1182 },
1183 nfkcIndexes[]={
1184 0, 1, 2, 5, 7
1185 },
1186 fcdIndexes[]={
1187 0, 1, 2, 5, 6, 7
1188 };
1189
1190 UCharIterator iter;
1191
1192 UChar buffer[4];
1193 int32_t length;
1194
1195 UBool neededToNormalize;
1196 UErrorCode errorCode;
1197
1198 uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1199
1200 /* test iteration with doNormalize */
1201 iter.index=0;
1202 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, true, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1203 iter.index=0;
1204 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, true, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1205 iter.index=0;
1206 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, true, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1207 iter.index=0;
1208 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, true, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1209 iter.index=0;
1210 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, true, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1211
1212 iter.index=iter.length;
1213 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, false, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1214 iter.index=iter.length;
1215 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, false, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1216 iter.index=iter.length;
1217 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, false, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1218 iter.index=iter.length;
1219 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, false, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1220 iter.index=iter.length;
1221 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, false, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1222
1223 /* test iteration without doNormalize */
1224 iter.index=0;
1225 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, true, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1226 iter.index=0;
1227 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, true, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1228 iter.index=0;
1229 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, true, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1230 iter.index=0;
1231 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, true, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1232 iter.index=0;
1233 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, true, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1234
1235 iter.index=iter.length;
1236 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, false, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1237 iter.index=iter.length;
1238 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, false, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1239 iter.index=iter.length;
1240 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, false, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1241 iter.index=iter.length;
1242 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, false, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1243 iter.index=iter.length;
1244 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, false, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1245
1246 /* try without neededToNormalize */
1247 errorCode=U_ZERO_ERROR;
1248 buffer[0]=5;
1249 iter.index=1;
1250 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1251 UNORM_NFD, 0, true, NULL,
1252 &errorCode);
1253 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1254 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1255 return;
1256 }
1257
1258 /* preflight */
1259 neededToNormalize=9;
1260 iter.index=1;
1261 length=unorm_next(&iter, NULL, 0,
1262 UNORM_NFD, 0, true, &neededToNormalize,
1263 &errorCode);
1264 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=false || length!=2) {
1265 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1266 return;
1267 }
1268
1269 errorCode=U_ZERO_ERROR;
1270 buffer[0]=buffer[1]=5;
1271 neededToNormalize=9;
1272 iter.index=1;
1273 length=unorm_next(&iter, buffer, 1,
1274 UNORM_NFD, 0, true, &neededToNormalize,
1275 &errorCode);
1276 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=false || length!=2 || buffer[1]!=5) {
1277 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1278 return;
1279 }
1280
1281 /* no iterator */
1282 errorCode=U_ZERO_ERROR;
1283 buffer[0]=buffer[1]=5;
1284 neededToNormalize=9;
1285 iter.index=1;
1286 length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1287 UNORM_NFD, 0, true, &neededToNormalize,
1288 &errorCode);
1289 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1290 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1291 return;
1292 }
1293
1294 /* illegal mode */
1295 buffer[0]=buffer[1]=5;
1296 neededToNormalize=9;
1297 iter.index=1;
1298 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1299 (UNormalizationMode)0, 0, true, &neededToNormalize,
1300 &errorCode);
1301 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1302 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1303 return;
1304 }
1305
1306 /* error coming in */
1307 errorCode=U_MISPLACED_QUANTIFIER;
1308 buffer[0]=5;
1309 iter.index=1;
1310 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1311 UNORM_NFD, 0, true, NULL,
1312 &errorCode);
1313 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1314 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1315 return;
1316 }
1317 }
1318
1319 static void
TestFCNFKCClosure(void)1320 TestFCNFKCClosure(void) {
1321 static const struct {
1322 UChar32 c;
1323 const UChar s[6];
1324 } tests[]={
1325 { 0x00C4, { 0 } },
1326 { 0x00E4, { 0 } },
1327 { 0x037A, { 0x0020, 0x03B9, 0 } },
1328 { 0x03D2, { 0x03C5, 0 } },
1329 { 0x20A8, { 0x0072, 0x0073, 0 } },
1330 { 0x210B, { 0x0068, 0 } },
1331 { 0x210C, { 0x0068, 0 } },
1332 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1333 { 0x2122, { 0x0074, 0x006D, 0 } },
1334 { 0x2128, { 0x007A, 0 } },
1335 { 0x1D5DB, { 0x0068, 0 } },
1336 { 0x1D5ED, { 0x007A, 0 } },
1337 { 0x0061, { 0 } }
1338 };
1339
1340 UChar buffer[8];
1341 UErrorCode errorCode;
1342 int32_t i, length;
1343
1344 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1345 errorCode=U_ZERO_ERROR;
1346 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1347 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1348 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1349 }
1350 }
1351
1352 /* error handling */
1353 errorCode=U_ZERO_ERROR;
1354 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1355 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1356 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1357 }
1358
1359 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1360 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1361 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1362 }
1363 }
1364
1365 static void
TestQuickCheckPerCP()1366 TestQuickCheckPerCP() {
1367 UErrorCode errorCode;
1368 UChar32 c, lead, trail;
1369 UChar s[U16_MAX_LENGTH], nfd[16];
1370 int32_t length, lccc1, lccc2, tccc1, tccc2;
1371 int32_t qc1, qc2;
1372
1373 if(
1374 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1375 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1376 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1377 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1378 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1379 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1380 ) {
1381 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1382 }
1383
1384 /*
1385 * compare the quick check property values for some code points
1386 * to the quick check results for checking same-code point strings
1387 */
1388 errorCode=U_ZERO_ERROR;
1389 c=0;
1390 while(c<0x110000) {
1391 length=0;
1392 U16_APPEND_UNSAFE(s, length, c);
1393
1394 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1395 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1396 if(qc1!=qc2) {
1397 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1398 }
1399
1400 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1401 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1402 if(qc1!=qc2) {
1403 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1404 }
1405
1406 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1407 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1408 if(qc1!=qc2) {
1409 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1410 }
1411
1412 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1413 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1414 if(qc1!=qc2) {
1415 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1416 }
1417
1418 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1419 if (U_FAILURE(errorCode)) {
1420 log_data_err("%s:%d errorCode=%s\n", __FILE__, __LINE__, u_errorName(errorCode));
1421 break;
1422 }
1423
1424 /* length-length == 0 is used to get around a compiler warning. */
1425 U16_GET(nfd, 0, length-length, length, lead);
1426 U16_GET(nfd, 0, length-1, length, trail);
1427
1428 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1429 lccc2=u_getCombiningClass(lead);
1430 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1431 tccc2=u_getCombiningClass(trail);
1432
1433 if(lccc1!=lccc2) {
1434 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1435 lccc1, lccc2, c);
1436 }
1437 if(tccc1!=tccc2) {
1438 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1439 tccc1, tccc2, c);
1440 }
1441
1442 /* skip some code points */
1443 c=(20*c)/19+1;
1444 }
1445 }
1446
1447 static void
TestComposition(void)1448 TestComposition(void) {
1449 static const struct {
1450 UNormalizationMode mode;
1451 uint32_t options;
1452 UChar input[12];
1453 UChar expect[12];
1454 } cases[]={
1455 /*
1456 * special cases for UAX #15 bug
1457 * see Unicode Corrigendum #5: Normalization Idempotency
1458 * at http://unicode.org/versions/corrigendum5.html
1459 * (was Public Review Issue #29)
1460 */
1461 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1462 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1463 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1464 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1465
1466 /* TODO: add test cases for UNORM_FCC here (j2151) */
1467 };
1468
1469 UChar output[16];
1470 UErrorCode errorCode;
1471 int32_t i, length;
1472
1473 for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1474 errorCode=U_ZERO_ERROR;
1475 length=unorm_normalize(
1476 cases[i].input, -1,
1477 cases[i].mode, cases[i].options,
1478 output, UPRV_LENGTHOF(output),
1479 &errorCode);
1480 if( U_FAILURE(errorCode) ||
1481 length!=u_strlen(cases[i].expect) ||
1482 0!=u_memcmp(output, cases[i].expect, length)
1483 ) {
1484 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1485 }
1486 }
1487 }
1488
1489 static void
TestGetDecomposition()1490 TestGetDecomposition() {
1491 UChar decomp[32];
1492 int32_t length;
1493
1494 UErrorCode errorCode=U_ZERO_ERROR;
1495 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1496 if(U_FAILURE(errorCode)) {
1497 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1498 return;
1499 }
1500
1501 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1502 if(U_FAILURE(errorCode) || length>=0) {
1503 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1504 }
1505 errorCode=U_ZERO_ERROR;
1506 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1507 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1508 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1509 }
1510 errorCode=U_ZERO_ERROR;
1511 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1512 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1513 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1514 }
1515 errorCode=U_ZERO_ERROR;
1516 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1517 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1518 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1519 }
1520 errorCode=U_ZERO_ERROR;
1521 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1522 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1523 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1524 }
1525 errorCode=U_ZERO_ERROR;
1526 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1527 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1528 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1529 }
1530 }
1531
1532 static void
TestGetRawDecomposition()1533 TestGetRawDecomposition() {
1534 UChar decomp[32];
1535 int32_t length;
1536
1537 UErrorCode errorCode=U_ZERO_ERROR;
1538 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1539 if(U_FAILURE(errorCode)) {
1540 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1541 return;
1542 }
1543 /*
1544 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1545 * without recursive decomposition.
1546 */
1547
1548 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1549 if(U_FAILURE(errorCode) || length>=0) {
1550 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1551 }
1552 errorCode=U_ZERO_ERROR;
1553 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1554 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1555 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1556 }
1557 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1558 errorCode=U_ZERO_ERROR;
1559 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1560 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1561 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1562 }
1563 /* U+212B ANGSTROM SIGN */
1564 errorCode=U_ZERO_ERROR;
1565 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1566 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1567 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1568 }
1569 errorCode=U_ZERO_ERROR;
1570 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1571 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1572 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1573 }
1574 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1575 errorCode=U_ZERO_ERROR;
1576 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1577 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1578 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1579 }
1580 errorCode=U_ZERO_ERROR;
1581 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1582 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1583 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1584 }
1585 errorCode=U_ZERO_ERROR;
1586 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1587 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1588 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1589 }
1590 errorCode=U_ZERO_ERROR;
1591 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1592 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1593 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1594 }
1595 }
1596
1597 static void
TestAppendRestoreMiddle()1598 TestAppendRestoreMiddle() {
1599 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1600 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1601 /* NFC: C5 is 'A with ring above' */
1602 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1603 int32_t length;
1604 UErrorCode errorCode=U_ZERO_ERROR;
1605 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1606 if(U_FAILURE(errorCode)) {
1607 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1608 return;
1609 }
1610 /*
1611 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1612 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1613 * still fits into a[] but the full result still overflows this capacity.
1614 * (Let it modify the destination buffer before reallocating internally.)
1615 */
1616 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1617 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1618 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1619 return;
1620 }
1621 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1622 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1623 log_err("unorm2_append(overflow) modified the first string\n");
1624 return;
1625 }
1626 errorCode=U_ZERO_ERROR;
1627 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1628 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1629 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1630 return;
1631 }
1632 }
1633
1634 static void
TestGetEasyToUseInstance()1635 TestGetEasyToUseInstance() {
1636 static const UChar in[]={
1637 0xA0, /* -> <noBreak> 0020 */
1638 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1639 };
1640 UChar out[32];
1641 int32_t length;
1642
1643 UErrorCode errorCode=U_ZERO_ERROR;
1644 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1645 if(U_FAILURE(errorCode)) {
1646 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1647 return;
1648 }
1649 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1650 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1651 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1652 (int)length, u_errorName(errorCode));
1653 }
1654
1655 errorCode=U_ZERO_ERROR;
1656 n2=unorm2_getNFDInstance(&errorCode);
1657 if(U_FAILURE(errorCode)) {
1658 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1659 return;
1660 }
1661 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1662 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1663 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1664 (int)length, u_errorName(errorCode));
1665 }
1666
1667 errorCode=U_ZERO_ERROR;
1668 n2=unorm2_getNFKCInstance(&errorCode);
1669 if(U_FAILURE(errorCode)) {
1670 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1671 return;
1672 }
1673 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1674 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1675 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1676 (int)length, u_errorName(errorCode));
1677 }
1678
1679 errorCode=U_ZERO_ERROR;
1680 n2=unorm2_getNFKDInstance(&errorCode);
1681 if(U_FAILURE(errorCode)) {
1682 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1683 return;
1684 }
1685 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1686 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1687 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1688 (int)length, u_errorName(errorCode));
1689 }
1690
1691 errorCode=U_ZERO_ERROR;
1692 n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1693 if(U_FAILURE(errorCode)) {
1694 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1695 return;
1696 }
1697 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1698 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1699 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1700 (int)length, u_errorName(errorCode));
1701 }
1702 }
1703
1704 static void
TestAPICoverage()1705 TestAPICoverage() {
1706 UErrorCode errorCode = U_ZERO_ERROR;
1707 const UNormalizer2 *n2 = unorm2_getNFDInstance(&errorCode);
1708 if (U_FAILURE(errorCode)) {
1709 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1710 return;
1711 }
1712
1713 if (!unorm2_hasBoundaryBefore(n2, u'C') || unorm2_hasBoundaryBefore(n2, 0x300)) {
1714 log_err("unorm2_hasBoundaryBefore() returns unexpected results\n");
1715 }
1716
1717 if (!unorm2_hasBoundaryAfter(n2, u'C') || unorm2_hasBoundaryAfter(n2, 0x300)) {
1718 log_err("unorm2_hasBoundaryAfter() returns unexpected results\n");
1719 }
1720
1721 if (!unorm2_isInert(n2, 0x50005) || unorm2_isInert(n2, 0x300)) {
1722 log_err("unorm2_isInert() returns unexpected results\n");
1723 }
1724
1725 errorCode = U_ZERO_ERROR;
1726 if (!unorm2_isNormalized(n2, u"c\u0327\u0300", 3, &errorCode) ||
1727 unorm2_isNormalized(n2, u"c\u0300\u0327", 3, &errorCode) ||
1728 U_FAILURE(errorCode)) {
1729 log_err("unorm2_isNormalized() returns unexpected results\n");
1730 }
1731
1732 errorCode = U_ZERO_ERROR;
1733 if (unorm2_quickCheck(n2, u"c\u0327\u0300", 3, &errorCode) == UNORM_NO ||
1734 unorm2_quickCheck(n2, u"c\u0300\u0327", 3, &errorCode) == UNORM_YES ||
1735 U_FAILURE(errorCode)) {
1736 log_err("unorm2_quickCheck() returns unexpected results\n");
1737 }
1738
1739 errorCode = U_ZERO_ERROR;
1740 if (unorm2_spanQuickCheckYes(n2, u"c\u0327\u0300", 3, &errorCode) != 3 ||
1741 unorm2_spanQuickCheckYes(n2, u"c\u0300\u0327", 3, &errorCode) != 1 ||
1742 U_FAILURE(errorCode)) {
1743 log_err("unorm2_spanQuickCheckYes() returns unexpected results\n");
1744 }
1745
1746 errorCode = U_ZERO_ERROR;
1747 UChar first[10] = { u'c', 0x300, 0, 0, 0, 0, 0, 0, 0, 0 };
1748 int32_t length = unorm2_normalizeSecondAndAppend(
1749 n2, first, 2, UPRV_LENGTHOF(first), u"\u0327d", 2, &errorCode);
1750 if (U_FAILURE(errorCode) || length != 4 || u_strcmp(first, u"c\u0327\u0300d") != 0) {
1751 log_err("unorm2_normalizeSecondAndAppend() returns unexpected results\n");
1752 }
1753 }
1754
1755 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1756