1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 * synwee added test for quick check
16 * synwee added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24
25 #if !UCONFIG_NO_NORMALIZATION
26
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33
34 static void
35 TestAPI(void);
36
37 static void
38 TestNormCoverage(void);
39
40 static void
41 TestConcatenate(void);
42
43 static void
44 TestNextPrevious(void);
45
46 static void TestIsNormalized(void);
47
48 static void
49 TestFCNFKCClosure(void);
50
51 static void
52 TestQuickCheckPerCP(void);
53
54 static void
55 TestComposition(void);
56
57 static void
58 TestFCD(void);
59
60 static void
61 TestGetDecomposition(void);
62
63 static void
64 TestGetRawDecomposition(void);
65
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68
69 static const char* const canonTests[][3] = {
70 /* Input*/ /*Decomposed*/ /*Composed*/
71 { "cat", "cat", "cat" },
72 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
73
74 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
75 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
76
77 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
78 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
79 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
80
81 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
82 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
83
84 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
85 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
86 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
87
88 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
89 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
90
91 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
92 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
93
94 { "Henry IV", "Henry IV", "Henry IV" },
95 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
96
97 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
98 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
99 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
100 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
101 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
102 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
103 { "", "", "" }
104 };
105
106 static const char* const compatTests[][3] = {
107 /* Input*/ /*Decomposed */ /*Composed*/
108 { "cat", "cat", "cat" },
109
110 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
111
112 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
113 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
114
115 { "Henry IV", "Henry IV", "Henry IV" },
116 { "Henry \\u2163", "Henry IV", "Henry IV" },
117
118 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
119 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
120
121 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
122
123 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
125 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
126 { "", "", "" }
127 };
128
129 static const char* const fcdTests[][3] = {
130 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */
132 { "\\u010e", "\\u010e", NULL } /* D-caron */
133 };
134
135 void addNormTest(TestNode** root);
136
addNormTest(TestNode ** root)137 void addNormTest(TestNode** root)
138 {
139 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 }
160
161 static const char* const modeStrings[]={
162 "UNORM_NONE",
163 "UNORM_NFD",
164 "UNORM_NFKD",
165 "UNORM_NFC",
166 "UNORM_NFKC",
167 "UNORM_FCD",
168 "UNORM_MODE_COUNT"
169 };
170
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)171 static void TestNormCases(UNormalizationMode mode,
172 const char* const cases[][3], int32_t lengthOfCases) {
173 int32_t x, neededLen, length2;
174 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
175 UChar *source=NULL;
176 UChar result[16];
177 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
178 for(x=0; x < lengthOfCases; x++)
179 {
180 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
181 source=CharsToUChars(cases[x][0]);
182 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
183 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
184 if(neededLen!=length2) {
185 log_err("ERROR in unorm_normalize(%s)[%d]: "
186 "preflight length/NUL %d!=%d preflight length/srcLength\n",
187 modeStrings[mode], (int)x, (int)neededLen, (int)length2);
188 }
189 if(status==U_BUFFER_OVERFLOW_ERROR)
190 {
191 status=U_ZERO_ERROR;
192 }
193 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
194 if(U_FAILURE(status) || neededLen!=length2) {
195 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
196 modeStrings[mode], austrdup(source), myErrorName(status));
197 } else {
198 assertEqual(result, cases[x][expIndex], x);
199 }
200 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
201 if(U_FAILURE(status) || neededLen!=length2) {
202 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
203 modeStrings[mode], austrdup(source), myErrorName(status));
204 } else {
205 assertEqual(result, cases[x][expIndex], x);
206 }
207 free(source);
208 }
209 }
210
TestDecomp()211 void TestDecomp() {
212 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
213 }
214
TestCompatDecomp()215 void TestCompatDecomp() {
216 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
217 }
218
TestCanonDecompCompose()219 void TestCanonDecompCompose() {
220 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
221 }
222
TestCompatDecompCompose()223 void TestCompatDecompCompose() {
224 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
225 }
226
TestFCD()227 void TestFCD() {
228 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
229 }
230
assertEqual(const UChar * result,const char * expected,int32_t index)231 static void assertEqual(const UChar* result, const char* expected, int32_t index)
232 {
233 UChar *expectedUni = CharsToUChars(expected);
234 if(u_strcmp(result, expectedUni)!=0){
235 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
236 austrdup(result) );
237 }
238 free(expectedUni);
239 }
240
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)241 static void TestNull_check(UChar *src, int32_t srcLen,
242 UChar *exp, int32_t expLen,
243 UNormalizationMode mode,
244 const char *name)
245 {
246 UErrorCode status = U_ZERO_ERROR;
247 int32_t len, i;
248
249 UChar result[50];
250
251
252 status = U_ZERO_ERROR;
253
254 for(i=0;i<50;i++)
255 {
256 result[i] = 0xFFFD;
257 }
258
259 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
260
261 if(U_FAILURE(status)) {
262 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
263 } else if (len != expLen) {
264 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
265 }
266
267 {
268 for(i=0;i<len;i++){
269 if(exp[i] != result[i]) {
270 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
271 name,
272 i,
273 exp[i],
274 result[i]);
275 return;
276 }
277 log_verbose(" %d: \\u%04X\n", i, result[i]);
278 }
279 }
280
281 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
282 }
283
TestNull()284 void TestNull()
285 {
286
287 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
288 int32_t source_comp_len = 4;
289 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
290 int32_t expect_comp_len = 3;
291
292 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
293 int32_t source_dcmp_len = 3;
294 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
295 int32_t expect_dcmp_len = 5;
296
297 TestNull_check(source_comp,
298 source_comp_len,
299 expect_comp,
300 expect_comp_len,
301 UNORM_NFC,
302 "UNORM_NFC");
303
304 TestNull_check(source_dcmp,
305 source_dcmp_len,
306 expect_dcmp,
307 expect_dcmp_len,
308 UNORM_NFD,
309 "UNORM_NFD");
310
311 TestNull_check(source_comp,
312 source_comp_len,
313 expect_comp,
314 expect_comp_len,
315 UNORM_NFKC,
316 "UNORM_NFKC");
317
318
319 }
320
TestQuickCheckResultNO()321 static void TestQuickCheckResultNO()
322 {
323 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
324 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
325 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
326 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
327 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
328 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
329 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
330 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
331
332
333 const int SIZE = 10;
334
335 int count = 0;
336 UErrorCode error = U_ZERO_ERROR;
337
338 for (; count < SIZE; count ++)
339 {
340 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
341 UNORM_NO)
342 {
343 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
344 return;
345 }
346 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
347 UNORM_NO)
348 {
349 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
350 return;
351 }
352 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
353 UNORM_NO)
354 {
355 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
356 return;
357 }
358 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
359 UNORM_NO)
360 {
361 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
362 return;
363 }
364 }
365 }
366
367
TestQuickCheckResultYES()368 static void TestQuickCheckResultYES()
369 {
370 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
371 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
372 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
373 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
374 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
375 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
376 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
377 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
378
379 const int SIZE = 10;
380 int count = 0;
381 UErrorCode error = U_ZERO_ERROR;
382
383 UChar cp = 0;
384 while (cp < 0xA0)
385 {
386 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
387 {
388 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
389 return;
390 }
391 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
392 UNORM_YES)
393 {
394 log_err("ERROR in NFC quick check at U+%04x\n", cp);
395 return;
396 }
397 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
398 {
399 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
400 return;
401 }
402 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
403 UNORM_YES)
404 {
405 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
406 return;
407 }
408 cp ++;
409 }
410
411 for (; count < SIZE; count ++)
412 {
413 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
414 UNORM_YES)
415 {
416 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
417 return;
418 }
419 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
420 != UNORM_YES)
421 {
422 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
423 return;
424 }
425 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
426 UNORM_YES)
427 {
428 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
429 return;
430 }
431 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
432 UNORM_YES)
433 {
434 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
435 return;
436 }
437 }
438 }
439
TestQuickCheckResultMAYBE()440 static void TestQuickCheckResultMAYBE()
441 {
442 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
443 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
444 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
445 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
446
447
448 const int SIZE = 10;
449
450 int count = 0;
451 UErrorCode error = U_ZERO_ERROR;
452
453 /* NFD and NFKD does not have any MAYBE codepoints */
454 for (; count < SIZE; count ++)
455 {
456 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
457 UNORM_MAYBE)
458 {
459 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
460 return;
461 }
462 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
463 UNORM_MAYBE)
464 {
465 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
466 return;
467 }
468 }
469 }
470
TestQuickCheckStringResult()471 static void TestQuickCheckStringResult()
472 {
473 int count;
474 UChar *d = NULL;
475 UChar *c = NULL;
476 UErrorCode error = U_ZERO_ERROR;
477
478 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
479 {
480 d = CharsToUChars(canonTests[count][1]);
481 c = CharsToUChars(canonTests[count][2]);
482 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
483 UNORM_YES)
484 {
485 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
486 return;
487 }
488
489 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
490 UNORM_NO)
491 {
492 log_err("ERROR in NFC quick check for string at count %d\n", count);
493 return;
494 }
495
496 free(d);
497 free(c);
498 }
499
500 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
501 {
502 d = CharsToUChars(compatTests[count][1]);
503 c = CharsToUChars(compatTests[count][2]);
504 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
505 UNORM_YES)
506 {
507 log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
508 return;
509 }
510
511 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
512 UNORM_YES)
513 {
514 log_err("ERROR in NFKC quick check for string at count %d\n", count);
515 return;
516 }
517
518 free(d);
519 free(c);
520 }
521 }
522
TestQuickCheck()523 void TestQuickCheck()
524 {
525 TestQuickCheckResultNO();
526 TestQuickCheckResultYES();
527 TestQuickCheckResultMAYBE();
528 TestQuickCheckStringResult();
529 }
530
531 /*
532 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
533 * normalized, and some that are not.
534 * Here we pick some specific cases and test the C API.
535 */
TestIsNormalized(void)536 static void TestIsNormalized(void) {
537 static const UChar notNFC[][8]={ /* strings that are not in NFC */
538 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
539 { 0xfb1d, 0 }, /* excluded from composition */
540 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
541 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
542 };
543 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
544 { 0x1100, 0x1161, 0 }, /* Jamo compose */
545 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
546 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
547 };
548
549 int32_t i;
550 UErrorCode errorCode;
551
552 /* API test */
553
554 /* normal case with length>=0 (length -1 used for special cases below) */
555 errorCode=U_ZERO_ERROR;
556 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
557 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
558 }
559
560 /* incoming U_FAILURE */
561 errorCode=U_TRUNCATED_CHAR_FOUND;
562 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
563 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
564 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
565 }
566
567 /* NULL source */
568 errorCode=U_ZERO_ERROR;
569 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
570 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
571 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
572 }
573
574 /* bad length */
575 errorCode=U_ZERO_ERROR;
576 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
577 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
578 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
579 }
580
581 /* specific cases */
582 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
583 errorCode=U_ZERO_ERROR;
584 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
585 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
586 }
587 errorCode=U_ZERO_ERROR;
588 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
589 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
590 }
591 }
592 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
593 errorCode=U_ZERO_ERROR;
594 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
595 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
596 }
597 }
598 }
599
TestCheckFCD()600 void TestCheckFCD()
601 {
602 UErrorCode status = U_ZERO_ERROR;
603 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
604 0x0A};
605 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
606 0x02B9, 0x0314, 0x0315, 0x0316};
607 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
608 0x0050, 0x0730, 0x09EE, 0x1E10};
609
610 static const UChar datastr[][5] =
611 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
612 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
613 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
614 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
615 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
616
617 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
618 0x6a,
619 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
620 0xea,
621 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
622 0x0307, 0x0308, 0x0309, 0x030a,
623 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
624 0x0327, 0x0328, 0x0329, 0x032a,
625 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
626 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
627
628 int count = 0;
629
630 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
631 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
632 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
633 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
634 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
635 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
636
637 if (U_FAILURE(status))
638 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
639
640 while (count < 4)
641 {
642 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
643 if (U_FAILURE(status)) {
644 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
645 break;
646 }
647 else {
648 if (result[count] != fcdresult) {
649 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
650 result[count]);
651 }
652 }
653 count ++;
654 }
655
656 /* random checks of long strings */
657 status = U_ZERO_ERROR;
658 srand((unsigned)time( NULL ));
659
660 for (count = 0; count < 50; count ++)
661 {
662 int size = 0;
663 UBool testresult = UNORM_YES;
664 UChar data[20];
665 UChar norm[100];
666 UChar nfd[100];
667 int normsize = 0;
668 int nfdsize = 0;
669
670 while (size != 19) {
671 data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
672 log_verbose("0x%x", data[size]);
673 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
674 norm + normsize, 100 - normsize, &status);
675 if (U_FAILURE(status)) {
676 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
677 break;
678 }
679 size ++;
680 }
681 log_verbose("\n");
682
683 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
684 nfd, 100, &status);
685 if (U_FAILURE(status)) {
686 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
687 }
688
689 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
690 testresult = UNORM_NO;
691 }
692 if (testresult == UNORM_YES) {
693 log_verbose("result UNORM_YES\n");
694 }
695 else {
696 log_verbose("result UNORM_NO\n");
697 }
698
699 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
700 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
701 }
702 }
703 }
704
705 static void
TestAPI()706 TestAPI() {
707 static const UChar in[]={ 0x68, 0xe4 };
708 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
709 UErrorCode errorCode;
710 int32_t length;
711
712 /* try preflighting */
713 errorCode=U_ZERO_ERROR;
714 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
715 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
716 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
717 return;
718 }
719
720 errorCode=U_ZERO_ERROR;
721 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
722 if(U_FAILURE(errorCode)) {
723 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
724 return;
725 }
726 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
727 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
728 return;
729 }
730 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
731 if(U_FAILURE(errorCode)) {
732 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
733 return;
734 }
735 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
736 if(U_FAILURE(errorCode)) {
737 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
738 return;
739 }
740 }
741
742 /* test cases to improve test code coverage */
743 enum {
744 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
745 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
746 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
747
748 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
749 HANGUL_WEO=0x116f, /* Jamo V U+116f */
750 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
751
752 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
753 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
754
755 MUSICAL_VOID_NOTEHEAD=0x1d157,
756 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
757 MUSICAL_STEM=0x1d165, /* cc=216 */
758 MUSICAL_STACCATO=0x1d17c /* cc=220 */
759 };
760
761 static void
TestNormCoverage()762 TestNormCoverage() {
763 UChar input[1000], expect[1000], output[1000];
764 UErrorCode errorCode;
765 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
766
767 /* create a long and nasty string with NFKC-unsafe characters */
768 inLength=0;
769
770 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
771 input[inLength++]=HANGUL_KIYEOK;
772 input[inLength++]=HANGUL_WEO;
773 input[inLength++]=HANGUL_KIYEOK_SIOS;
774
775 input[inLength++]=HANGUL_KIYEOK;
776 input[inLength++]=HANGUL_WEO;
777 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
778
779 input[inLength++]=HANGUL_KIYEOK;
780 input[inLength++]=HANGUL_K_WEO;
781 input[inLength++]=HANGUL_KIYEOK_SIOS;
782
783 input[inLength++]=HANGUL_KIYEOK;
784 input[inLength++]=HANGUL_K_WEO;
785 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
786
787 input[inLength++]=HANGUL_K_KIYEOK;
788 input[inLength++]=HANGUL_WEO;
789 input[inLength++]=HANGUL_KIYEOK_SIOS;
790
791 input[inLength++]=HANGUL_K_KIYEOK;
792 input[inLength++]=HANGUL_WEO;
793 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
794
795 input[inLength++]=HANGUL_K_KIYEOK;
796 input[inLength++]=HANGUL_K_WEO;
797 input[inLength++]=HANGUL_KIYEOK_SIOS;
798
799 input[inLength++]=HANGUL_K_KIYEOK;
800 input[inLength++]=HANGUL_K_WEO;
801 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
802
803 /* Hangul LV with normal/compatibility Jamo T */
804 input[inLength++]=HANGUL_AC00;
805 input[inLength++]=HANGUL_KIYEOK_SIOS;
806
807 input[inLength++]=HANGUL_AC00;
808 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
809
810 /* compatibility Jamo L, V */
811 input[inLength++]=HANGUL_K_KIYEOK;
812 input[inLength++]=HANGUL_K_WEO;
813
814 hangulPrefixLength=inLength;
815
816 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
817 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
818 for(i=0; i<200; ++i) {
819 input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
820 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
821 input[inLength++]=U16_LEAD(MUSICAL_STEM);
822 input[inLength++]=U16_TRAIL(MUSICAL_STEM);
823 }
824
825 /* (compatibility) Jamo L, T do not compose */
826 input[inLength++]=HANGUL_K_KIYEOK;
827 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
828
829 /* quick checks */
830 errorCode=U_ZERO_ERROR;
831 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
832 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
833 }
834 errorCode=U_ZERO_ERROR;
835 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
836 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
837 }
838 errorCode=U_ZERO_ERROR;
839 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
840 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
841 }
842 errorCode=U_ZERO_ERROR;
843 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
844 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
845 }
846 errorCode=U_ZERO_ERROR;
847 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
848 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
849 }
850
851 /* NFKC */
852 expectLength=0;
853 expect[expectLength++]=HANGUL_SYLLABLE;
854
855 expect[expectLength++]=HANGUL_SYLLABLE;
856
857 expect[expectLength++]=HANGUL_SYLLABLE;
858
859 expect[expectLength++]=HANGUL_SYLLABLE;
860
861 expect[expectLength++]=HANGUL_SYLLABLE;
862
863 expect[expectLength++]=HANGUL_SYLLABLE;
864
865 expect[expectLength++]=HANGUL_SYLLABLE;
866
867 expect[expectLength++]=HANGUL_SYLLABLE;
868
869 expect[expectLength++]=HANGUL_AC00+3;
870
871 expect[expectLength++]=HANGUL_AC00+3;
872
873 expect[expectLength++]=HANGUL_AC00+14*28;
874
875 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
876 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
877 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
878 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
879 for(i=0; i<200; ++i) {
880 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
881 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
882 }
883 for(i=0; i<200; ++i) {
884 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
885 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
886 }
887
888 expect[expectLength++]=HANGUL_KIYEOK;
889 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
890
891 /* try destination overflow first */
892 errorCode=U_ZERO_ERROR;
893 preflightLength=unorm_normalize(input, inLength,
894 UNORM_NFKC, 0,
895 output, 100, /* too short */
896 &errorCode);
897 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
898 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
899 }
900
901 /* real NFKC */
902 errorCode=U_ZERO_ERROR;
903 length=unorm_normalize(input, inLength,
904 UNORM_NFKC, 0,
905 output, UPRV_LENGTHOF(output),
906 &errorCode);
907 if(U_FAILURE(errorCode)) {
908 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
909 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
910 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
911 for(i=0; i<length; ++i) {
912 if(output[i]!=expect[i]) {
913 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
914 break;
915 }
916 }
917 }
918 if(length!=preflightLength) {
919 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
920 }
921
922 /* FCD */
923 u_memcpy(expect, input, hangulPrefixLength);
924 expectLength=hangulPrefixLength;
925
926 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
927 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
928 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
929 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
930 for(i=0; i<200; ++i) {
931 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
932 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
933 }
934 for(i=0; i<200; ++i) {
935 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
936 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
937 }
938
939 expect[expectLength++]=HANGUL_K_KIYEOK;
940 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
941
942 errorCode=U_ZERO_ERROR;
943 length=unorm_normalize(input, inLength,
944 UNORM_FCD, 0,
945 output, UPRV_LENGTHOF(output),
946 &errorCode);
947 if(U_FAILURE(errorCode)) {
948 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
949 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
950 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
951 for(i=0; i<length; ++i) {
952 if(output[i]!=expect[i]) {
953 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
954 break;
955 }
956 }
957 }
958 }
959
960 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
961 static void
TestConcatenate(void)962 TestConcatenate(void) {
963 /* "re + 'sume'" */
964 static const UChar
965 left[]={
966 0x72, 0x65, 0
967 },
968 right[]={
969 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
970 },
971 expect[]={
972 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
973 };
974
975 UChar buffer[100];
976 UErrorCode errorCode;
977 int32_t length;
978
979 /* left with length, right NUL-terminated */
980 errorCode=U_ZERO_ERROR;
981 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
982 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
983 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
984 }
985
986 /* preflighting */
987 errorCode=U_ZERO_ERROR;
988 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
989 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
990 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
991 }
992
993 buffer[2]=0x5555;
994 errorCode=U_ZERO_ERROR;
995 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
996 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
997 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
998 }
999
1000 /* enter with U_FAILURE */
1001 buffer[2]=0xaaaa;
1002 errorCode=U_UNEXPECTED_TOKEN;
1003 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1004 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1005 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1006 }
1007
1008 /* illegal arguments */
1009 buffer[2]=0xaaaa;
1010 errorCode=U_ZERO_ERROR;
1011 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1012 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1013 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1014 }
1015
1016 errorCode=U_ZERO_ERROR;
1017 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1018 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1019 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1020 }
1021 }
1022
1023 enum {
1024 _PLUS=0x2b
1025 };
1026
1027 static const char *const _modeString[UNORM_MODE_COUNT]={
1028 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1029 };
1030
1031 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1032 _testIter(const UChar *src, int32_t srcLength,
1033 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1034 const UChar *out, int32_t outLength,
1035 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1036 UChar buffer[4];
1037 const UChar *expect, *outLimit, *in;
1038 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1039 UErrorCode errorCode;
1040 UBool neededToNormalize, expectNeeded;
1041
1042 errorCode=U_ZERO_ERROR;
1043 outLimit=out+outLength;
1044 if(forward) {
1045 expect=out;
1046 i=index=0;
1047 } else {
1048 expect=outLimit;
1049 i=srcIndexesLength-2;
1050 index=srcLength;
1051 }
1052
1053 for(;;) {
1054 prevIndex=index;
1055 if(forward) {
1056 if(!iter->hasNext(iter)) {
1057 return;
1058 }
1059 length=unorm_next(iter,
1060 buffer, UPRV_LENGTHOF(buffer),
1061 mode, 0,
1062 (UBool)(out!=NULL), &neededToNormalize,
1063 &errorCode);
1064 expectIndex=srcIndexes[i+1];
1065 in=src+prevIndex;
1066 inLength=expectIndex-prevIndex;
1067
1068 if(out!=NULL) {
1069 /* get output piece from between plus signs */
1070 expectLength=0;
1071 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1072 ++expectLength;
1073 }
1074 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1075 } else {
1076 expect=in;
1077 expectLength=inLength;
1078 expectNeeded=FALSE;
1079 }
1080 } else {
1081 if(!iter->hasPrevious(iter)) {
1082 return;
1083 }
1084 length=unorm_previous(iter,
1085 buffer, UPRV_LENGTHOF(buffer),
1086 mode, 0,
1087 (UBool)(out!=NULL), &neededToNormalize,
1088 &errorCode);
1089 expectIndex=srcIndexes[i];
1090 in=src+expectIndex;
1091 inLength=prevIndex-expectIndex;
1092
1093 if(out!=NULL) {
1094 /* get output piece from between plus signs */
1095 expectLength=0;
1096 while(expect!=out && expect[-1]!=_PLUS) {
1097 ++expectLength;
1098 --expect;
1099 }
1100 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1101 } else {
1102 expect=in;
1103 expectLength=inLength;
1104 expectNeeded=FALSE;
1105 }
1106 }
1107 index=iter->getIndex(iter, UITER_CURRENT);
1108
1109 if(U_FAILURE(errorCode)) {
1110 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1111 forward, _modeString[mode], i, u_errorName(errorCode));
1112 return;
1113 }
1114 if(expectIndex!=index) {
1115 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1116 forward, _modeString[mode], i, index, expectIndex);
1117 return;
1118 }
1119 if(expectLength!=length) {
1120 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1121 forward, _modeString[mode], i, length, expectLength);
1122 return;
1123 }
1124 if(0!=u_memcmp(expect, buffer, length)) {
1125 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1126 forward, _modeString[mode], i);
1127 return;
1128 }
1129 if(neededToNormalize!=expectNeeded) {
1130 }
1131
1132 if(forward) {
1133 expect+=expectLength+1; /* go after the + */
1134 ++i;
1135 } else {
1136 --expect; /* go before the + */
1137 --i;
1138 }
1139 }
1140 }
1141
1142 static void
TestNextPrevious()1143 TestNextPrevious() {
1144 static const UChar
1145 src[]={ /* input string */
1146 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1147 },
1148 nfd[]={ /* + separates expected output pieces */
1149 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1150 },
1151 nfkd[]={
1152 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1153 },
1154 nfc[]={
1155 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1156 },
1157 nfkc[]={
1158 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1159 },
1160 fcd[]={
1161 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1162 };
1163
1164 /* expected iterator indexes in the source string for each iteration piece */
1165 static const int32_t
1166 nfdIndexes[]={
1167 0, 1, 2, 5, 6, 7
1168 },
1169 nfkdIndexes[]={
1170 0, 1, 2, 5, 6, 7
1171 },
1172 nfcIndexes[]={
1173 0, 1, 2, 5, 6, 7
1174 },
1175 nfkcIndexes[]={
1176 0, 1, 2, 5, 7
1177 },
1178 fcdIndexes[]={
1179 0, 1, 2, 5, 6, 7
1180 };
1181
1182 UCharIterator iter;
1183
1184 UChar buffer[4];
1185 int32_t length;
1186
1187 UBool neededToNormalize;
1188 UErrorCode errorCode;
1189
1190 uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1191
1192 /* test iteration with doNormalize */
1193 iter.index=0;
1194 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1195 iter.index=0;
1196 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1197 iter.index=0;
1198 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1199 iter.index=0;
1200 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1201 iter.index=0;
1202 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1203
1204 iter.index=iter.length;
1205 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1206 iter.index=iter.length;
1207 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1208 iter.index=iter.length;
1209 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1210 iter.index=iter.length;
1211 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1212 iter.index=iter.length;
1213 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1214
1215 /* test iteration without doNormalize */
1216 iter.index=0;
1217 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1218 iter.index=0;
1219 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1220 iter.index=0;
1221 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1222 iter.index=0;
1223 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1224 iter.index=0;
1225 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1226
1227 iter.index=iter.length;
1228 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1229 iter.index=iter.length;
1230 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1231 iter.index=iter.length;
1232 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1233 iter.index=iter.length;
1234 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1235 iter.index=iter.length;
1236 _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1237
1238 /* try without neededToNormalize */
1239 errorCode=U_ZERO_ERROR;
1240 buffer[0]=5;
1241 iter.index=1;
1242 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1243 UNORM_NFD, 0, TRUE, NULL,
1244 &errorCode);
1245 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1246 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1247 return;
1248 }
1249
1250 /* preflight */
1251 neededToNormalize=9;
1252 iter.index=1;
1253 length=unorm_next(&iter, NULL, 0,
1254 UNORM_NFD, 0, TRUE, &neededToNormalize,
1255 &errorCode);
1256 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1257 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1258 return;
1259 }
1260
1261 errorCode=U_ZERO_ERROR;
1262 buffer[0]=buffer[1]=5;
1263 neededToNormalize=9;
1264 iter.index=1;
1265 length=unorm_next(&iter, buffer, 1,
1266 UNORM_NFD, 0, TRUE, &neededToNormalize,
1267 &errorCode);
1268 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1269 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1270 return;
1271 }
1272
1273 /* no iterator */
1274 errorCode=U_ZERO_ERROR;
1275 buffer[0]=buffer[1]=5;
1276 neededToNormalize=9;
1277 iter.index=1;
1278 length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1279 UNORM_NFD, 0, TRUE, &neededToNormalize,
1280 &errorCode);
1281 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1282 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1283 return;
1284 }
1285
1286 /* illegal mode */
1287 buffer[0]=buffer[1]=5;
1288 neededToNormalize=9;
1289 iter.index=1;
1290 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1291 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1292 &errorCode);
1293 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1294 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1295 return;
1296 }
1297
1298 /* error coming in */
1299 errorCode=U_MISPLACED_QUANTIFIER;
1300 buffer[0]=5;
1301 iter.index=1;
1302 length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1303 UNORM_NFD, 0, TRUE, NULL,
1304 &errorCode);
1305 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1306 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1307 return;
1308 }
1309 }
1310
1311 static void
TestFCNFKCClosure(void)1312 TestFCNFKCClosure(void) {
1313 static const struct {
1314 UChar32 c;
1315 const UChar s[6];
1316 } tests[]={
1317 { 0x00C4, { 0 } },
1318 { 0x00E4, { 0 } },
1319 { 0x037A, { 0x0020, 0x03B9, 0 } },
1320 { 0x03D2, { 0x03C5, 0 } },
1321 { 0x20A8, { 0x0072, 0x0073, 0 } },
1322 { 0x210B, { 0x0068, 0 } },
1323 { 0x210C, { 0x0068, 0 } },
1324 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1325 { 0x2122, { 0x0074, 0x006D, 0 } },
1326 { 0x2128, { 0x007A, 0 } },
1327 { 0x1D5DB, { 0x0068, 0 } },
1328 { 0x1D5ED, { 0x007A, 0 } },
1329 { 0x0061, { 0 } }
1330 };
1331
1332 UChar buffer[8];
1333 UErrorCode errorCode;
1334 int32_t i, length;
1335
1336 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1337 errorCode=U_ZERO_ERROR;
1338 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1339 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1340 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1341 }
1342 }
1343
1344 /* error handling */
1345 errorCode=U_ZERO_ERROR;
1346 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1347 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1348 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1349 }
1350
1351 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1352 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1353 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1354 }
1355 }
1356
1357 static void
TestQuickCheckPerCP()1358 TestQuickCheckPerCP() {
1359 UErrorCode errorCode;
1360 UChar32 c, lead, trail;
1361 UChar s[U16_MAX_LENGTH], nfd[16];
1362 int32_t length, lccc1, lccc2, tccc1, tccc2;
1363 int32_t qc1, qc2;
1364
1365 if(
1366 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1367 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1368 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1369 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1370 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1371 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1372 ) {
1373 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1374 }
1375
1376 /*
1377 * compare the quick check property values for some code points
1378 * to the quick check results for checking same-code point strings
1379 */
1380 errorCode=U_ZERO_ERROR;
1381 c=0;
1382 while(c<0x110000) {
1383 length=0;
1384 U16_APPEND_UNSAFE(s, length, c);
1385
1386 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1387 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1388 if(qc1!=qc2) {
1389 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1390 }
1391
1392 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1393 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1394 if(qc1!=qc2) {
1395 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1396 }
1397
1398 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1399 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1400 if(qc1!=qc2) {
1401 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1402 }
1403
1404 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1405 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1406 if(qc1!=qc2) {
1407 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1408 }
1409
1410 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1411 /* length-length == 0 is used to get around a compiler warning. */
1412 U16_GET(nfd, 0, length-length, length, lead);
1413 U16_GET(nfd, 0, length-1, length, trail);
1414
1415 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1416 lccc2=u_getCombiningClass(lead);
1417 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1418 tccc2=u_getCombiningClass(trail);
1419
1420 if(lccc1!=lccc2) {
1421 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1422 lccc1, lccc2, c);
1423 }
1424 if(tccc1!=tccc2) {
1425 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1426 tccc1, tccc2, c);
1427 }
1428
1429 /* skip some code points */
1430 c=(20*c)/19+1;
1431 }
1432 }
1433
1434 static void
TestComposition(void)1435 TestComposition(void) {
1436 static const struct {
1437 UNormalizationMode mode;
1438 uint32_t options;
1439 UChar input[12];
1440 UChar expect[12];
1441 } cases[]={
1442 /*
1443 * special cases for UAX #15 bug
1444 * see Unicode Corrigendum #5: Normalization Idempotency
1445 * at http://unicode.org/versions/corrigendum5.html
1446 * (was Public Review Issue #29)
1447 */
1448 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1449 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1450 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1451 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1452
1453 /* TODO: add test cases for UNORM_FCC here (j2151) */
1454 };
1455
1456 UChar output[16];
1457 UErrorCode errorCode;
1458 int32_t i, length;
1459
1460 for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1461 errorCode=U_ZERO_ERROR;
1462 length=unorm_normalize(
1463 cases[i].input, -1,
1464 cases[i].mode, cases[i].options,
1465 output, UPRV_LENGTHOF(output),
1466 &errorCode);
1467 if( U_FAILURE(errorCode) ||
1468 length!=u_strlen(cases[i].expect) ||
1469 0!=u_memcmp(output, cases[i].expect, length)
1470 ) {
1471 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1472 }
1473 }
1474 }
1475
1476 static void
TestGetDecomposition()1477 TestGetDecomposition() {
1478 UChar decomp[32];
1479 int32_t length;
1480
1481 UErrorCode errorCode=U_ZERO_ERROR;
1482 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1483 if(U_FAILURE(errorCode)) {
1484 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1485 return;
1486 }
1487
1488 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1489 if(U_FAILURE(errorCode) || length>=0) {
1490 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1491 }
1492 errorCode=U_ZERO_ERROR;
1493 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1494 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1495 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1496 }
1497 errorCode=U_ZERO_ERROR;
1498 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1499 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1500 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1501 }
1502 errorCode=U_ZERO_ERROR;
1503 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1504 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1505 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1506 }
1507 errorCode=U_ZERO_ERROR;
1508 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1509 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1510 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1511 }
1512 errorCode=U_ZERO_ERROR;
1513 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1514 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1515 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1516 }
1517 }
1518
1519 static void
TestGetRawDecomposition()1520 TestGetRawDecomposition() {
1521 UChar decomp[32];
1522 int32_t length;
1523
1524 UErrorCode errorCode=U_ZERO_ERROR;
1525 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1526 if(U_FAILURE(errorCode)) {
1527 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1528 return;
1529 }
1530 /*
1531 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1532 * without recursive decomposition.
1533 */
1534
1535 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1536 if(U_FAILURE(errorCode) || length>=0) {
1537 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1538 }
1539 errorCode=U_ZERO_ERROR;
1540 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1541 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1542 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1543 }
1544 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1545 errorCode=U_ZERO_ERROR;
1546 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1547 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1548 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1549 }
1550 /* U+212B ANGSTROM SIGN */
1551 errorCode=U_ZERO_ERROR;
1552 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1553 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1554 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1555 }
1556 errorCode=U_ZERO_ERROR;
1557 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1558 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1559 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1560 }
1561 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1562 errorCode=U_ZERO_ERROR;
1563 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1564 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1565 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1566 }
1567 errorCode=U_ZERO_ERROR;
1568 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1569 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1570 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1571 }
1572 errorCode=U_ZERO_ERROR;
1573 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1574 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1575 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1576 }
1577 errorCode=U_ZERO_ERROR;
1578 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1579 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1580 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1581 }
1582 }
1583
1584 static void
TestAppendRestoreMiddle()1585 TestAppendRestoreMiddle() {
1586 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1587 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1588 /* NFC: C5 is 'A with ring above' */
1589 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1590 int32_t length;
1591 UErrorCode errorCode=U_ZERO_ERROR;
1592 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1593 if(U_FAILURE(errorCode)) {
1594 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1595 return;
1596 }
1597 /*
1598 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1599 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1600 * still fits into a[] but the full result still overflows this capacity.
1601 * (Let it modify the destination buffer before reallocating internally.)
1602 */
1603 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1604 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1605 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1606 return;
1607 }
1608 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1609 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1610 log_err("unorm2_append(overflow) modified the first string\n");
1611 return;
1612 }
1613 errorCode=U_ZERO_ERROR;
1614 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1615 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1616 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1617 return;
1618 }
1619 }
1620
1621 static void
TestGetEasyToUseInstance()1622 TestGetEasyToUseInstance() {
1623 static const UChar in[]={
1624 0xA0, /* -> <noBreak> 0020 */
1625 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1626 };
1627 UChar out[32];
1628 int32_t length;
1629
1630 UErrorCode errorCode=U_ZERO_ERROR;
1631 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1632 if(U_FAILURE(errorCode)) {
1633 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1634 return;
1635 }
1636 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1637 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1638 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1639 (int)length, u_errorName(errorCode));
1640 }
1641
1642 errorCode=U_ZERO_ERROR;
1643 n2=unorm2_getNFDInstance(&errorCode);
1644 if(U_FAILURE(errorCode)) {
1645 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1646 return;
1647 }
1648 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1649 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1650 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1651 (int)length, u_errorName(errorCode));
1652 }
1653
1654 errorCode=U_ZERO_ERROR;
1655 n2=unorm2_getNFKCInstance(&errorCode);
1656 if(U_FAILURE(errorCode)) {
1657 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1658 return;
1659 }
1660 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1661 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1662 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1663 (int)length, u_errorName(errorCode));
1664 }
1665
1666 errorCode=U_ZERO_ERROR;
1667 n2=unorm2_getNFKDInstance(&errorCode);
1668 if(U_FAILURE(errorCode)) {
1669 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1670 return;
1671 }
1672 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1673 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1674 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1675 (int)length, u_errorName(errorCode));
1676 }
1677
1678 errorCode=U_ZERO_ERROR;
1679 n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1680 if(U_FAILURE(errorCode)) {
1681 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1682 return;
1683 }
1684 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1685 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1686 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1687 (int)length, u_errorName(errorCode));
1688 }
1689 }
1690
1691 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1692