1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 * synwee added test for quick check
16 * synwee added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24
25 #if !UCONFIG_NO_NORMALIZATION
26
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33
34 static void
35 TestAPI(void);
36
37 static void
38 TestNormCoverage(void);
39
40 static void
41 TestConcatenate(void);
42
43 static void
44 TestNextPrevious(void);
45
46 static void TestIsNormalized(void);
47
48 static void
49 TestFCNFKCClosure(void);
50
51 static void
52 TestQuickCheckPerCP(void);
53
54 static void
55 TestComposition(void);
56
57 static void
58 TestFCD(void);
59
60 static void
61 TestGetDecomposition(void);
62
63 static void
64 TestGetRawDecomposition(void);
65
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68
69 static const char* const canonTests[][3] = {
70 /* Input*/ /*Decomposed*/ /*Composed*/
71 { "cat", "cat", "cat" },
72 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
73
74 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
75 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
76
77 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
78 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
79 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
80
81 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
82 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
83
84 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
85 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
86 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
87
88 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
89 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
90
91 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
92 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
93
94 { "Henry IV", "Henry IV", "Henry IV" },
95 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
96
97 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
98 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
99 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
100 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
101 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
102 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
103 { "", "", "" }
104 };
105
106 static const char* const compatTests[][3] = {
107 /* Input*/ /*Decomposed */ /*Composed*/
108 { "cat", "cat", "cat" },
109
110 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
111
112 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
113 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
114
115 { "Henry IV", "Henry IV", "Henry IV" },
116 { "Henry \\u2163", "Henry IV", "Henry IV" },
117
118 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
119 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
120
121 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
122
123 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
125 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
126 { "", "", "" }
127 };
128
129 static const char* const fcdTests[][3] = {
130 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */
132 { "\\u010e", "\\u010e", NULL } /* D-caron */
133 };
134
135 void addNormTest(TestNode** root);
136
addNormTest(TestNode ** root)137 void addNormTest(TestNode** root)
138 {
139 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 }
160
161 static const char* const modeStrings[]={
162 "?",
163 "UNORM_NONE",
164 "UNORM_NFD",
165 "UNORM_NFKD",
166 "UNORM_NFC",
167 "UNORM_NFKC",
168 "UNORM_FCD",
169 "UNORM_MODE_COUNT"
170 };
171
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)172 static void TestNormCases(UNormalizationMode mode,
173 const char* const cases[][3], int32_t lengthOfCases) {
174 int32_t x, neededLen, length2;
175 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
176 UChar *source=NULL;
177 UChar result[16];
178 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
179 for(x=0; x < lengthOfCases; x++)
180 {
181 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
182 source=CharsToUChars(cases[x][0]);
183 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
184 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
185 if(neededLen!=length2) {
186 log_err("ERROR in unorm_normalize(%s)[%d]: "
187 "preflight length/srcLength %d!=%d preflight length/NUL\n",
188 modeStrings[mode], (int)x, (int)neededLen, (int)length2);
189 }
190 if(status==U_BUFFER_OVERFLOW_ERROR)
191 {
192 status=U_ZERO_ERROR;
193 }
194 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
195 if(U_FAILURE(status) || neededLen!=length2) {
196 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
197 modeStrings[mode], austrdup(source), myErrorName(status));
198 } else {
199 assertEqual(result, cases[x][expIndex], x);
200 }
201 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
202 if(U_FAILURE(status) || neededLen!=length2) {
203 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
204 modeStrings[mode], austrdup(source), myErrorName(status));
205 } else {
206 assertEqual(result, cases[x][expIndex], x);
207 }
208 free(source);
209 }
210 }
211
TestDecomp()212 void TestDecomp() {
213 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
214 }
215
TestCompatDecomp()216 void TestCompatDecomp() {
217 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
218 }
219
TestCanonDecompCompose()220 void TestCanonDecompCompose() {
221 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
222 }
223
TestCompatDecompCompose()224 void TestCompatDecompCompose() {
225 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
226 }
227
TestFCD()228 void TestFCD() {
229 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
230 }
231
assertEqual(const UChar * result,const char * expected,int32_t index)232 static void assertEqual(const UChar* result, const char* expected, int32_t index)
233 {
234 UChar *expectedUni = CharsToUChars(expected);
235 if(u_strcmp(result, expectedUni)!=0){
236 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
237 austrdup(result) );
238 }
239 free(expectedUni);
240 }
241
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)242 static void TestNull_check(UChar *src, int32_t srcLen,
243 UChar *exp, int32_t expLen,
244 UNormalizationMode mode,
245 const char *name)
246 {
247 UErrorCode status = U_ZERO_ERROR;
248 int32_t len, i;
249
250 UChar result[50];
251
252
253 status = U_ZERO_ERROR;
254
255 for(i=0;i<50;i++)
256 {
257 result[i] = 0xFFFD;
258 }
259
260 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
261
262 if(U_FAILURE(status)) {
263 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
264 } else if (len != expLen) {
265 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
266 }
267
268 {
269 for(i=0;i<len;i++){
270 if(exp[i] != result[i]) {
271 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
272 name,
273 i,
274 exp[i],
275 result[i]);
276 return;
277 }
278 log_verbose(" %d: \\u%04X\n", i, result[i]);
279 }
280 }
281
282 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
283 }
284
TestNull()285 void TestNull()
286 {
287
288 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
289 int32_t source_comp_len = 4;
290 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
291 int32_t expect_comp_len = 3;
292
293 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
294 int32_t source_dcmp_len = 3;
295 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
296 int32_t expect_dcmp_len = 5;
297
298 TestNull_check(source_comp,
299 source_comp_len,
300 expect_comp,
301 expect_comp_len,
302 UNORM_NFC,
303 "UNORM_NFC");
304
305 TestNull_check(source_dcmp,
306 source_dcmp_len,
307 expect_dcmp,
308 expect_dcmp_len,
309 UNORM_NFD,
310 "UNORM_NFD");
311
312 TestNull_check(source_comp,
313 source_comp_len,
314 expect_comp,
315 expect_comp_len,
316 UNORM_NFKC,
317 "UNORM_NFKC");
318
319
320 }
321
TestQuickCheckResultNO()322 static void TestQuickCheckResultNO()
323 {
324 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
325 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
326 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
327 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
328 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
329 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
331 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
332
333
334 const int SIZE = 10;
335
336 int count = 0;
337 UErrorCode error = U_ZERO_ERROR;
338
339 for (; count < SIZE; count ++)
340 {
341 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
342 UNORM_NO)
343 {
344 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
345 return;
346 }
347 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
348 UNORM_NO)
349 {
350 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
351 return;
352 }
353 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
354 UNORM_NO)
355 {
356 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
357 return;
358 }
359 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
360 UNORM_NO)
361 {
362 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
363 return;
364 }
365 }
366 }
367
368
TestQuickCheckResultYES()369 static void TestQuickCheckResultYES()
370 {
371 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
372 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
373 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
374 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
375 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
376 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
377 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
378 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
379
380 const int SIZE = 10;
381 int count = 0;
382 UErrorCode error = U_ZERO_ERROR;
383
384 UChar cp = 0;
385 while (cp < 0xA0)
386 {
387 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
388 {
389 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
390 return;
391 }
392 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
393 UNORM_YES)
394 {
395 log_err("ERROR in NFC quick check at U+%04x\n", cp);
396 return;
397 }
398 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
399 {
400 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
401 return;
402 }
403 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
404 UNORM_YES)
405 {
406 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
407 return;
408 }
409 cp ++;
410 }
411
412 for (; count < SIZE; count ++)
413 {
414 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
415 UNORM_YES)
416 {
417 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
418 return;
419 }
420 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
421 != UNORM_YES)
422 {
423 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
424 return;
425 }
426 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
427 UNORM_YES)
428 {
429 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
430 return;
431 }
432 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
433 UNORM_YES)
434 {
435 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
436 return;
437 }
438 }
439 }
440
TestQuickCheckResultMAYBE()441 static void TestQuickCheckResultMAYBE()
442 {
443 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
444 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
445 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
446 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
447
448
449 const int SIZE = 10;
450
451 int count = 0;
452 UErrorCode error = U_ZERO_ERROR;
453
454 /* NFD and NFKD does not have any MAYBE codepoints */
455 for (; count < SIZE; count ++)
456 {
457 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
458 UNORM_MAYBE)
459 {
460 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
461 return;
462 }
463 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
464 UNORM_MAYBE)
465 {
466 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
467 return;
468 }
469 }
470 }
471
TestQuickCheckStringResult()472 static void TestQuickCheckStringResult()
473 {
474 int count;
475 UChar *d = NULL;
476 UChar *c = NULL;
477 UErrorCode error = U_ZERO_ERROR;
478
479 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
480 {
481 d = CharsToUChars(canonTests[count][1]);
482 c = CharsToUChars(canonTests[count][2]);
483 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
484 UNORM_YES)
485 {
486 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
487 return;
488 }
489
490 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
491 UNORM_NO)
492 {
493 log_err("ERROR in NFC quick check for string at count %d\n", count);
494 return;
495 }
496
497 free(d);
498 free(c);
499 }
500
501 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
502 {
503 d = CharsToUChars(compatTests[count][1]);
504 c = CharsToUChars(compatTests[count][2]);
505 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
506 UNORM_YES)
507 {
508 log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
509 return;
510 }
511
512 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
513 UNORM_YES)
514 {
515 log_err("ERROR in NFKC quick check for string at count %d\n", count);
516 return;
517 }
518
519 free(d);
520 free(c);
521 }
522 }
523
TestQuickCheck()524 void TestQuickCheck()
525 {
526 TestQuickCheckResultNO();
527 TestQuickCheckResultYES();
528 TestQuickCheckResultMAYBE();
529 TestQuickCheckStringResult();
530 }
531
532 /*
533 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
534 * normalized, and some that are not.
535 * Here we pick some specific cases and test the C API.
536 */
TestIsNormalized(void)537 static void TestIsNormalized(void) {
538 static const UChar notNFC[][8]={ /* strings that are not in NFC */
539 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
540 { 0xfb1d, 0 }, /* excluded from composition */
541 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
542 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
543 };
544 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
545 { 0x1100, 0x1161, 0 }, /* Jamo compose */
546 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
547 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
548 };
549
550 int32_t i;
551 UErrorCode errorCode;
552
553 /* API test */
554
555 /* normal case with length>=0 (length -1 used for special cases below) */
556 errorCode=U_ZERO_ERROR;
557 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
558 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
559 }
560
561 /* incoming U_FAILURE */
562 errorCode=U_TRUNCATED_CHAR_FOUND;
563 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
564 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
565 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
566 }
567
568 /* NULL source */
569 errorCode=U_ZERO_ERROR;
570 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
571 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
572 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
573 }
574
575 /* bad length */
576 errorCode=U_ZERO_ERROR;
577 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
578 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
579 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
580 }
581
582 /* specific cases */
583 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
584 errorCode=U_ZERO_ERROR;
585 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
586 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
587 }
588 errorCode=U_ZERO_ERROR;
589 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
590 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
591 }
592 }
593 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
594 errorCode=U_ZERO_ERROR;
595 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
596 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
597 }
598 }
599 }
600
TestCheckFCD()601 void TestCheckFCD()
602 {
603 UErrorCode status = U_ZERO_ERROR;
604 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
605 0x0A};
606 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
607 0x02B9, 0x0314, 0x0315, 0x0316};
608 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
609 0x0050, 0x0730, 0x09EE, 0x1E10};
610
611 static const UChar datastr[][5] =
612 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
613 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
614 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
615 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
616 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
617
618 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
619 0x6a,
620 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
621 0xea,
622 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
623 0x0307, 0x0308, 0x0309, 0x030a,
624 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
625 0x0327, 0x0328, 0x0329, 0x032a,
626 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
627 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
628
629 int count = 0;
630
631 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
632 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
633 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
634 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
635 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
636 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
637
638 if (U_FAILURE(status))
639 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
640
641 while (count < 4)
642 {
643 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
644 if (U_FAILURE(status)) {
645 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
646 break;
647 }
648 else {
649 if (result[count] != fcdresult) {
650 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
651 result[count]);
652 }
653 }
654 count ++;
655 }
656
657 /* random checks of long strings */
658 status = U_ZERO_ERROR;
659 srand((unsigned)time( NULL ));
660
661 for (count = 0; count < 50; count ++)
662 {
663 int size = 0;
664 UBool testresult = UNORM_YES;
665 UChar data[20];
666 UChar norm[100];
667 UChar nfd[100];
668 int normsize = 0;
669 int nfdsize = 0;
670
671 while (size != 19) {
672 data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
673 log_verbose("0x%x", data[size]);
674 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
675 norm + normsize, 100 - normsize, &status);
676 if (U_FAILURE(status)) {
677 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
678 break;
679 }
680 size ++;
681 }
682 log_verbose("\n");
683
684 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
685 nfd, 100, &status);
686 if (U_FAILURE(status)) {
687 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
688 }
689
690 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
691 testresult = UNORM_NO;
692 }
693 if (testresult == UNORM_YES) {
694 log_verbose("result UNORM_YES\n");
695 }
696 else {
697 log_verbose("result UNORM_NO\n");
698 }
699
700 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
701 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
702 }
703 }
704 }
705
706 static void
TestAPI()707 TestAPI() {
708 static const UChar in[]={ 0x68, 0xe4 };
709 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
710 UErrorCode errorCode;
711 int32_t length;
712
713 /* try preflighting */
714 errorCode=U_ZERO_ERROR;
715 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
716 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
717 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
718 return;
719 }
720
721 errorCode=U_ZERO_ERROR;
722 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
723 if(U_FAILURE(errorCode)) {
724 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
725 return;
726 }
727 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
728 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
729 return;
730 }
731 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
732 if(U_FAILURE(errorCode)) {
733 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
734 return;
735 }
736 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
737 if(U_FAILURE(errorCode)) {
738 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
739 return;
740 }
741 }
742
743 /* test cases to improve test code coverage */
744 enum {
745 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
746 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
747 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
748
749 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
750 HANGUL_WEO=0x116f, /* Jamo V U+116f */
751 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
752
753 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
754 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
755
756 MUSICAL_VOID_NOTEHEAD=0x1d157,
757 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
758 MUSICAL_STEM=0x1d165, /* cc=216 */
759 MUSICAL_STACCATO=0x1d17c /* cc=220 */
760 };
761
762 static void
TestNormCoverage()763 TestNormCoverage() {
764 UChar input[1000], expect[1000], output[1000];
765 UErrorCode errorCode;
766 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
767
768 /* create a long and nasty string with NFKC-unsafe characters */
769 inLength=0;
770
771 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
772 input[inLength++]=HANGUL_KIYEOK;
773 input[inLength++]=HANGUL_WEO;
774 input[inLength++]=HANGUL_KIYEOK_SIOS;
775
776 input[inLength++]=HANGUL_KIYEOK;
777 input[inLength++]=HANGUL_WEO;
778 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
779
780 input[inLength++]=HANGUL_KIYEOK;
781 input[inLength++]=HANGUL_K_WEO;
782 input[inLength++]=HANGUL_KIYEOK_SIOS;
783
784 input[inLength++]=HANGUL_KIYEOK;
785 input[inLength++]=HANGUL_K_WEO;
786 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
787
788 input[inLength++]=HANGUL_K_KIYEOK;
789 input[inLength++]=HANGUL_WEO;
790 input[inLength++]=HANGUL_KIYEOK_SIOS;
791
792 input[inLength++]=HANGUL_K_KIYEOK;
793 input[inLength++]=HANGUL_WEO;
794 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
795
796 input[inLength++]=HANGUL_K_KIYEOK;
797 input[inLength++]=HANGUL_K_WEO;
798 input[inLength++]=HANGUL_KIYEOK_SIOS;
799
800 input[inLength++]=HANGUL_K_KIYEOK;
801 input[inLength++]=HANGUL_K_WEO;
802 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
803
804 /* Hangul LV with normal/compatibility Jamo T */
805 input[inLength++]=HANGUL_AC00;
806 input[inLength++]=HANGUL_KIYEOK_SIOS;
807
808 input[inLength++]=HANGUL_AC00;
809 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
810
811 /* compatibility Jamo L, V */
812 input[inLength++]=HANGUL_K_KIYEOK;
813 input[inLength++]=HANGUL_K_WEO;
814
815 hangulPrefixLength=inLength;
816
817 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
818 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
819 for(i=0; i<200; ++i) {
820 input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
821 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
822 input[inLength++]=U16_LEAD(MUSICAL_STEM);
823 input[inLength++]=U16_TRAIL(MUSICAL_STEM);
824 }
825
826 /* (compatibility) Jamo L, T do not compose */
827 input[inLength++]=HANGUL_K_KIYEOK;
828 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
829
830 /* quick checks */
831 errorCode=U_ZERO_ERROR;
832 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
833 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
834 }
835 errorCode=U_ZERO_ERROR;
836 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
837 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
838 }
839 errorCode=U_ZERO_ERROR;
840 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
841 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
842 }
843 errorCode=U_ZERO_ERROR;
844 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
845 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
846 }
847 errorCode=U_ZERO_ERROR;
848 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
849 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
850 }
851
852 /* NFKC */
853 expectLength=0;
854 expect[expectLength++]=HANGUL_SYLLABLE;
855
856 expect[expectLength++]=HANGUL_SYLLABLE;
857
858 expect[expectLength++]=HANGUL_SYLLABLE;
859
860 expect[expectLength++]=HANGUL_SYLLABLE;
861
862 expect[expectLength++]=HANGUL_SYLLABLE;
863
864 expect[expectLength++]=HANGUL_SYLLABLE;
865
866 expect[expectLength++]=HANGUL_SYLLABLE;
867
868 expect[expectLength++]=HANGUL_SYLLABLE;
869
870 expect[expectLength++]=HANGUL_AC00+3;
871
872 expect[expectLength++]=HANGUL_AC00+3;
873
874 expect[expectLength++]=HANGUL_AC00+14*28;
875
876 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
877 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
878 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
879 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
880 for(i=0; i<200; ++i) {
881 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
882 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
883 }
884 for(i=0; i<200; ++i) {
885 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
886 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
887 }
888
889 expect[expectLength++]=HANGUL_KIYEOK;
890 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
891
892 /* try destination overflow first */
893 errorCode=U_ZERO_ERROR;
894 preflightLength=unorm_normalize(input, inLength,
895 UNORM_NFKC, 0,
896 output, 100, /* too short */
897 &errorCode);
898 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
899 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
900 }
901
902 /* real NFKC */
903 errorCode=U_ZERO_ERROR;
904 length=unorm_normalize(input, inLength,
905 UNORM_NFKC, 0,
906 output, UPRV_LENGTHOF(output),
907 &errorCode);
908 if(U_FAILURE(errorCode)) {
909 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
910 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
911 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
912 for(i=0; i<length; ++i) {
913 if(output[i]!=expect[i]) {
914 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
915 break;
916 }
917 }
918 }
919 if(length!=preflightLength) {
920 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
921 }
922
923 /* FCD */
924 u_memcpy(expect, input, hangulPrefixLength);
925 expectLength=hangulPrefixLength;
926
927 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
928 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
929 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
930 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
931 for(i=0; i<200; ++i) {
932 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
933 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
934 }
935 for(i=0; i<200; ++i) {
936 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
937 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
938 }
939
940 expect[expectLength++]=HANGUL_K_KIYEOK;
941 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
942
943 errorCode=U_ZERO_ERROR;
944 length=unorm_normalize(input, inLength,
945 UNORM_FCD, 0,
946 output, UPRV_LENGTHOF(output),
947 &errorCode);
948 if(U_FAILURE(errorCode)) {
949 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
950 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
951 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
952 for(i=0; i<length; ++i) {
953 if(output[i]!=expect[i]) {
954 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
955 break;
956 }
957 }
958 }
959 }
960
961 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
962 static void
TestConcatenate(void)963 TestConcatenate(void) {
964 /* "re + 'sume'" */
965 static const UChar
966 left[]={
967 0x72, 0x65, 0
968 },
969 right[]={
970 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
971 },
972 expect[]={
973 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
974 };
975
976 UChar buffer[100];
977 UErrorCode errorCode;
978 int32_t length;
979
980 /* left with length, right NUL-terminated */
981 errorCode=U_ZERO_ERROR;
982 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
983 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
984 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
985 }
986
987 /* preflighting */
988 errorCode=U_ZERO_ERROR;
989 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
990 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
991 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
992 }
993
994 buffer[2]=0x5555;
995 errorCode=U_ZERO_ERROR;
996 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
997 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
998 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
999 }
1000
1001 /* enter with U_FAILURE */
1002 buffer[2]=0xaaaa;
1003 errorCode=U_UNEXPECTED_TOKEN;
1004 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1005 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1006 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1007 }
1008
1009 /* illegal arguments */
1010 buffer[2]=0xaaaa;
1011 errorCode=U_ZERO_ERROR;
1012 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1013 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1014 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1015 }
1016
1017 errorCode=U_ZERO_ERROR;
1018 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1019 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1020 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1021 }
1022 }
1023
1024 enum {
1025 _PLUS=0x2b
1026 };
1027
1028 static const char *const _modeString[UNORM_MODE_COUNT]={
1029 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1030 };
1031
1032 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1033 _testIter(const UChar *src, int32_t srcLength,
1034 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1035 const UChar *out, int32_t outLength,
1036 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1037 UChar buffer[4];
1038 const UChar *expect, *outLimit, *in;
1039 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1040 UErrorCode errorCode;
1041 UBool neededToNormalize, expectNeeded;
1042
1043 errorCode=U_ZERO_ERROR;
1044 outLimit=out+outLength;
1045 if(forward) {
1046 expect=out;
1047 i=index=0;
1048 } else {
1049 expect=outLimit;
1050 i=srcIndexesLength-2;
1051 index=srcLength;
1052 }
1053
1054 for(;;) {
1055 prevIndex=index;
1056 if(forward) {
1057 if(!iter->hasNext(iter)) {
1058 return;
1059 }
1060 length=unorm_next(iter,
1061 buffer, UPRV_LENGTHOF(buffer),
1062 mode, 0,
1063 (UBool)(out!=NULL), &neededToNormalize,
1064 &errorCode);
1065 expectIndex=srcIndexes[i+1];
1066 in=src+prevIndex;
1067 inLength=expectIndex-prevIndex;
1068
1069 if(out!=NULL) {
1070 /* get output piece from between plus signs */
1071 expectLength=0;
1072 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1073 ++expectLength;
1074 }
1075 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1076 } else {
1077 expect=in;
1078 expectLength=inLength;
1079 expectNeeded=FALSE;
1080 }
1081 } else {
1082 if(!iter->hasPrevious(iter)) {
1083 return;
1084 }
1085 length=unorm_previous(iter,
1086 buffer, UPRV_LENGTHOF(buffer),
1087 mode, 0,
1088 (UBool)(out!=NULL), &neededToNormalize,
1089 &errorCode);
1090 expectIndex=srcIndexes[i];
1091 in=src+expectIndex;
1092 inLength=prevIndex-expectIndex;
1093
1094 if(out!=NULL) {
1095 /* get output piece from between plus signs */
1096 expectLength=0;
1097 while(expect!=out && expect[-1]!=_PLUS) {
1098 ++expectLength;
1099 --expect;
1100 }
1101 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1102 } else {
1103 expect=in;
1104 expectLength=inLength;
1105 expectNeeded=FALSE;
1106 }
1107 }
1108 index=iter->getIndex(iter, UITER_CURRENT);
1109
1110 if(U_FAILURE(errorCode)) {
1111 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1112 forward, _modeString[mode], i, u_errorName(errorCode));
1113 return;
1114 }
1115 if(expectIndex!=index) {
1116 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1117 forward, _modeString[mode], i, index, expectIndex);
1118 return;
1119 }
1120 if(expectLength!=length) {
1121 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1122 forward, _modeString[mode], i, length, expectLength);
1123 return;
1124 }
1125 if(0!=u_memcmp(expect, buffer, length)) {
1126 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1127 forward, _modeString[mode], i);
1128 return;
1129 }
1130 if(neededToNormalize!=expectNeeded) {
1131 }
1132
1133 if(forward) {
1134 expect+=expectLength+1; /* go after the + */
1135 ++i;
1136 } else {
1137 --expect; /* go before the + */
1138 --i;
1139 }
1140 }
1141 }
1142
1143 static void
TestNextPrevious()1144 TestNextPrevious() {
1145 static const UChar
1146 src[]={ /* input string */
1147 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1148 },
1149 nfd[]={ /* + separates expected output pieces */
1150 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1151 },
1152 nfkd[]={
1153 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1154 },
1155 nfc[]={
1156 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1157 },
1158 nfkc[]={
1159 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1160 },
1161 fcd[]={
1162 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1163 };
1164
1165 /* expected iterator indexes in the source string for each iteration piece */
1166 static const int32_t
1167 nfdIndexes[]={
1168 0, 1, 2, 5, 6, 7
1169 },
1170 nfkdIndexes[]={
1171 0, 1, 2, 5, 6, 7
1172 },
1173 nfcIndexes[]={
1174 0, 1, 2, 5, 6, 7
1175 },
1176 nfkcIndexes[]={
1177 0, 1, 2, 5, 7
1178 },
1179 fcdIndexes[]={
1180 0, 1, 2, 5, 6, 7
1181 };
1182
1183 UCharIterator iter;
1184
1185 UChar buffer[4];
1186 int32_t length;
1187
1188 UBool neededToNormalize;
1189 UErrorCode errorCode;
1190
1191 uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1192
1193 /* test iteration with doNormalize */
1194 iter.index=0;
1195 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1196 iter.index=0;
1197 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1198 iter.index=0;
1199 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1200 iter.index=0;
1201 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1202 iter.index=0;
1203 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1204
1205 iter.index=iter.length;
1206 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1207 iter.index=iter.length;
1208 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1209 iter.index=iter.length;
1210 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1211 iter.index=iter.length;
1212 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1213 iter.index=iter.length;
1214 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1215
1216 /* test iteration without doNormalize */
1217 iter.index=0;
1218 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1219 iter.index=0;
1220 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1221 iter.index=0;
1222 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1223 iter.index=0;
1224 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1225 iter.index=0;
1226 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1227
1228 iter.index=iter.length;
1229 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1230 iter.index=iter.length;
1231 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1232 iter.index=iter.length;
1233 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1234 iter.index=iter.length;
1235 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1236 iter.index=iter.length;
1237 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1238
1239 /* try without neededToNormalize */
1240 errorCode=U_ZERO_ERROR;
1241 buffer[0]=5;
1242 iter.index=1;
1243 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1244 UNORM_NFD, 0, TRUE, NULL,
1245 &errorCode);
1246 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1247 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1248 return;
1249 }
1250
1251 /* preflight */
1252 neededToNormalize=9;
1253 iter.index=1;
1254 length=unorm_next(&iter, NULL, 0,
1255 UNORM_NFD, 0, TRUE, &neededToNormalize,
1256 &errorCode);
1257 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1258 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1259 return;
1260 }
1261
1262 errorCode=U_ZERO_ERROR;
1263 buffer[0]=buffer[1]=5;
1264 neededToNormalize=9;
1265 iter.index=1;
1266 length=unorm_next(&iter, buffer, 1,
1267 UNORM_NFD, 0, TRUE, &neededToNormalize,
1268 &errorCode);
1269 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1270 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1271 return;
1272 }
1273
1274 /* no iterator */
1275 errorCode=U_ZERO_ERROR;
1276 buffer[0]=buffer[1]=5;
1277 neededToNormalize=9;
1278 iter.index=1;
1279 length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1280 UNORM_NFD, 0, TRUE, &neededToNormalize,
1281 &errorCode);
1282 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1283 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1284 return;
1285 }
1286
1287 /* illegal mode */
1288 buffer[0]=buffer[1]=5;
1289 neededToNormalize=9;
1290 iter.index=1;
1291 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1292 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1293 &errorCode);
1294 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1295 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1296 return;
1297 }
1298
1299 /* error coming in */
1300 errorCode=U_MISPLACED_QUANTIFIER;
1301 buffer[0]=5;
1302 iter.index=1;
1303 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1304 UNORM_NFD, 0, TRUE, NULL,
1305 &errorCode);
1306 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1307 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1308 return;
1309 }
1310 }
1311
1312 static void
TestFCNFKCClosure(void)1313 TestFCNFKCClosure(void) {
1314 static const struct {
1315 UChar32 c;
1316 const UChar s[6];
1317 } tests[]={
1318 { 0x00C4, { 0 } },
1319 { 0x00E4, { 0 } },
1320 { 0x037A, { 0x0020, 0x03B9, 0 } },
1321 { 0x03D2, { 0x03C5, 0 } },
1322 { 0x20A8, { 0x0072, 0x0073, 0 } },
1323 { 0x210B, { 0x0068, 0 } },
1324 { 0x210C, { 0x0068, 0 } },
1325 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1326 { 0x2122, { 0x0074, 0x006D, 0 } },
1327 { 0x2128, { 0x007A, 0 } },
1328 { 0x1D5DB, { 0x0068, 0 } },
1329 { 0x1D5ED, { 0x007A, 0 } },
1330 { 0x0061, { 0 } }
1331 };
1332
1333 UChar buffer[8];
1334 UErrorCode errorCode;
1335 int32_t i, length;
1336
1337 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1338 errorCode=U_ZERO_ERROR;
1339 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1340 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1341 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1342 }
1343 }
1344
1345 /* error handling */
1346 errorCode=U_ZERO_ERROR;
1347 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1348 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1349 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1350 }
1351
1352 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1353 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1354 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1355 }
1356 }
1357
1358 static void
TestQuickCheckPerCP()1359 TestQuickCheckPerCP() {
1360 UErrorCode errorCode;
1361 UChar32 c, lead, trail;
1362 UChar s[U16_MAX_LENGTH], nfd[16];
1363 int32_t length, lccc1, lccc2, tccc1, tccc2;
1364 int32_t qc1, qc2;
1365
1366 if(
1367 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1368 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1369 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1370 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1371 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1372 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1373 ) {
1374 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1375 }
1376
1377 /*
1378 * compare the quick check property values for some code points
1379 * to the quick check results for checking same-code point strings
1380 */
1381 errorCode=U_ZERO_ERROR;
1382 c=0;
1383 while(c<0x110000) {
1384 length=0;
1385 U16_APPEND_UNSAFE(s, length, c);
1386
1387 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1388 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1389 if(qc1!=qc2) {
1390 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1391 }
1392
1393 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1394 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1395 if(qc1!=qc2) {
1396 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1397 }
1398
1399 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1400 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1401 if(qc1!=qc2) {
1402 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1403 }
1404
1405 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1406 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1407 if(qc1!=qc2) {
1408 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1409 }
1410
1411 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1412 /* length-length == 0 is used to get around a compiler warning. */
1413 U16_GET(nfd, 0, length-length, length, lead);
1414 U16_GET(nfd, 0, length-1, length, trail);
1415
1416 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1417 lccc2=u_getCombiningClass(lead);
1418 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1419 tccc2=u_getCombiningClass(trail);
1420
1421 if(lccc1!=lccc2) {
1422 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1423 lccc1, lccc2, c);
1424 }
1425 if(tccc1!=tccc2) {
1426 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1427 tccc1, tccc2, c);
1428 }
1429
1430 /* skip some code points */
1431 c=(20*c)/19+1;
1432 }
1433 }
1434
1435 static void
TestComposition(void)1436 TestComposition(void) {
1437 static const struct {
1438 UNormalizationMode mode;
1439 uint32_t options;
1440 UChar input[12];
1441 UChar expect[12];
1442 } cases[]={
1443 /*
1444 * special cases for UAX #15 bug
1445 * see Unicode Corrigendum #5: Normalization Idempotency
1446 * at http://unicode.org/versions/corrigendum5.html
1447 * (was Public Review Issue #29)
1448 */
1449 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1450 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1451 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1452 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1453
1454 /* TODO: add test cases for UNORM_FCC here (j2151) */
1455 };
1456
1457 UChar output[16];
1458 UErrorCode errorCode;
1459 int32_t i, length;
1460
1461 for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1462 errorCode=U_ZERO_ERROR;
1463 length=unorm_normalize(
1464 cases[i].input, -1,
1465 cases[i].mode, cases[i].options,
1466 output, UPRV_LENGTHOF(output),
1467 &errorCode);
1468 if( U_FAILURE(errorCode) ||
1469 length!=u_strlen(cases[i].expect) ||
1470 0!=u_memcmp(output, cases[i].expect, length)
1471 ) {
1472 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1473 }
1474 }
1475 }
1476
1477 static void
TestGetDecomposition()1478 TestGetDecomposition() {
1479 UChar decomp[32];
1480 int32_t length;
1481
1482 UErrorCode errorCode=U_ZERO_ERROR;
1483 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1484 if(U_FAILURE(errorCode)) {
1485 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1486 return;
1487 }
1488
1489 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1490 if(U_FAILURE(errorCode) || length>=0) {
1491 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1492 }
1493 errorCode=U_ZERO_ERROR;
1494 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1495 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1496 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1497 }
1498 errorCode=U_ZERO_ERROR;
1499 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1500 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1501 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1502 }
1503 errorCode=U_ZERO_ERROR;
1504 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1505 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1506 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1507 }
1508 errorCode=U_ZERO_ERROR;
1509 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1510 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1511 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1512 }
1513 errorCode=U_ZERO_ERROR;
1514 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1515 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1516 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1517 }
1518 }
1519
1520 static void
TestGetRawDecomposition()1521 TestGetRawDecomposition() {
1522 UChar decomp[32];
1523 int32_t length;
1524
1525 UErrorCode errorCode=U_ZERO_ERROR;
1526 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1527 if(U_FAILURE(errorCode)) {
1528 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1529 return;
1530 }
1531 /*
1532 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1533 * without recursive decomposition.
1534 */
1535
1536 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1537 if(U_FAILURE(errorCode) || length>=0) {
1538 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1539 }
1540 errorCode=U_ZERO_ERROR;
1541 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1542 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1543 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1544 }
1545 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1546 errorCode=U_ZERO_ERROR;
1547 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1548 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1549 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1550 }
1551 /* U+212B ANGSTROM SIGN */
1552 errorCode=U_ZERO_ERROR;
1553 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1554 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1555 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1556 }
1557 errorCode=U_ZERO_ERROR;
1558 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1559 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1560 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1561 }
1562 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1563 errorCode=U_ZERO_ERROR;
1564 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1565 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1566 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1567 }
1568 errorCode=U_ZERO_ERROR;
1569 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1570 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1571 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1572 }
1573 errorCode=U_ZERO_ERROR;
1574 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1575 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1576 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1577 }
1578 errorCode=U_ZERO_ERROR;
1579 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1580 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1581 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1582 }
1583 }
1584
1585 static void
TestAppendRestoreMiddle()1586 TestAppendRestoreMiddle() {
1587 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1588 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1589 /* NFC: C5 is 'A with ring above' */
1590 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1591 int32_t length;
1592 UErrorCode errorCode=U_ZERO_ERROR;
1593 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1594 if(U_FAILURE(errorCode)) {
1595 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1596 return;
1597 }
1598 /*
1599 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1600 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1601 * still fits into a[] but the full result still overflows this capacity.
1602 * (Let it modify the destination buffer before reallocating internally.)
1603 */
1604 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1605 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1606 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1607 return;
1608 }
1609 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1610 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1611 log_err("unorm2_append(overflow) modified the first string\n");
1612 return;
1613 }
1614 errorCode=U_ZERO_ERROR;
1615 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1616 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1617 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1618 return;
1619 }
1620 }
1621
1622 static void
TestGetEasyToUseInstance()1623 TestGetEasyToUseInstance() {
1624 static const UChar in[]={
1625 0xA0, /* -> <noBreak> 0020 */
1626 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1627 };
1628 UChar out[32];
1629 int32_t length;
1630
1631 UErrorCode errorCode=U_ZERO_ERROR;
1632 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1633 if(U_FAILURE(errorCode)) {
1634 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1635 return;
1636 }
1637 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1638 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1639 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1640 (int)length, u_errorName(errorCode));
1641 }
1642
1643 errorCode=U_ZERO_ERROR;
1644 n2=unorm2_getNFDInstance(&errorCode);
1645 if(U_FAILURE(errorCode)) {
1646 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1647 return;
1648 }
1649 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1650 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1651 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1652 (int)length, u_errorName(errorCode));
1653 }
1654
1655 errorCode=U_ZERO_ERROR;
1656 n2=unorm2_getNFKCInstance(&errorCode);
1657 if(U_FAILURE(errorCode)) {
1658 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1659 return;
1660 }
1661 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1662 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1663 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1664 (int)length, u_errorName(errorCode));
1665 }
1666
1667 errorCode=U_ZERO_ERROR;
1668 n2=unorm2_getNFKDInstance(&errorCode);
1669 if(U_FAILURE(errorCode)) {
1670 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1671 return;
1672 }
1673 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1674 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1675 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1676 (int)length, u_errorName(errorCode));
1677 }
1678
1679 errorCode=U_ZERO_ERROR;
1680 n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1681 if(U_FAILURE(errorCode)) {
1682 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1683 return;
1684 }
1685 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1686 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1687 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1688 (int)length, u_errorName(errorCode));
1689 }
1690 }
1691
1692 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1693