• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package ohos.global.icu.dev.test.normalizer;
12 
13 import java.text.StringCharacterIterator;
14 import java.util.Random;
15 
16 import org.junit.Test;
17 import org.junit.runner.RunWith;
18 import org.junit.runners.JUnit4;
19 
20 import ohos.global.icu.dev.test.TestFmwk;
21 import ohos.global.icu.impl.Norm2AllModes;
22 import ohos.global.icu.impl.Normalizer2Impl;
23 import ohos.global.icu.impl.USerializedSet;
24 import ohos.global.icu.impl.Utility;
25 import ohos.global.icu.lang.UCharacter;
26 import ohos.global.icu.lang.UCharacterCategory;
27 import ohos.global.icu.lang.UProperty;
28 import ohos.global.icu.text.FilteredNormalizer2;
29 import ohos.global.icu.text.Normalizer;
30 import ohos.global.icu.text.Normalizer2;
31 import ohos.global.icu.text.UCharacterIterator;
32 import ohos.global.icu.text.UTF16;
33 import ohos.global.icu.text.UnicodeSet;
34 import ohos.global.icu.text.UnicodeSetIterator;
35 
36 
37 
38 
39 @RunWith(JUnit4.class)
40 public class BasicTest extends TestFmwk {
41     String[][] canonTests = {
42         // Input                Decomposed              Composed
43         { "cat",                "cat",                  "cat"               },
44         { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
45 
46         { "\u1e0a",             "D\u0307",              "\u1e0a"            }, // D-dot_above
47         { "D\u0307",            "D\u0307",              "\u1e0a"            }, // D dot_above
48 
49         { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_below dot_above
50         { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_above dot_below
51         { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      }, // D dot_below dot_above
52 
53         { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
54         { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
55 
56         { "\u1E14",             "E\u0304\u0300",        "\u1E14"            }, // E-macron-grave
57         { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            }, // E-macron + grave
58         { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      }, // E-grave + macron
59 
60         { "\u212b",             "A\u030a",              "\u00c5"            }, // angstrom_sign
61         { "\u00c5",             "A\u030a",              "\u00c5"            }, // A-ring
62 
63         { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
64         { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
65 
66         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        }, //updated with 3.0
67         { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     }, //updated with 3.0
68 
69         { "Henry IV",           "Henry IV",             "Henry IV"          },
70         { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
71 
72         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
73         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten
74         { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      }, // hw_ka + hw_ten
75         { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      }, // ka + hw_ten
76         { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      }, // hw_ka + ten
77 
78         { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
79         {"\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e","\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165", "\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165"},
80     };
81 
82     String[][] compatTests = {
83             // Input                Decomposed              Composed
84         { "cat",                 "cat",                     "cat"           },
85         { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     }, // Alef-Lamed vs. Alef, Lamed
86 
87         { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
88         { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        }, // ffi ligature -> f + f + i
89 
90         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },        //updated for 3.0
91         { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        }, // ffi ligature -> f + f + i
92 
93         { "Henry IV",           "Henry IV",             "Henry IV"          },
94         { "Henry \u2163",       "Henry IV",             "Henry IV"          },
95 
96         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
97         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten
98 
99         { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + ten
100 
101         /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
102         { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + hw_ten
103         { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // ka + hw_ten
104 
105     };
106 
107     // With Canonical decomposition, Hangul syllables should get decomposed
108     // into Jamo, but Jamo characters should not be decomposed into
109     // conjoining Jamo
110     String[][] hangulCanon = {
111         // Input                Decomposed              Composed
112         { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
113         { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
114     };
115 
116     // With compatibility decomposition turned on,
117     // it should go all the way down to conjoining Jamo characters.
118     // THIS IS NO LONGER TRUE IN UNICODE v2.1.8, SO THIS TEST IS OBSOLETE
119     String[][] hangulCompat = {
120         // Input        Decomposed                          Composed
121         // { "\ud4db",     "\u1111\u116e\u1175\u11af\u11c2",   "\ud478\u1175\u11af\u11c2"  },
122     };
123 
124     @Test
TestHangulCompose()125     public void TestHangulCompose()
126                 throws Exception{
127         // Make sure that the static composition methods work
128         logln("Canonical composition...");
129         staticTest(Normalizer.NFC, hangulCanon,  2);
130         logln("Compatibility composition...");
131         staticTest(Normalizer.NFKC, hangulCompat, 2);
132         // Now try iterative composition....
133         logln("Iterative composition...");
134         Normalizer norm = new Normalizer("", Normalizer.NFC,0);
135         iterateTest(norm, hangulCanon, 2);
136 
137         norm.setMode(Normalizer.NFKD);
138         iterateTest(norm, hangulCompat, 2);
139 
140         // And finally, make sure you can do it in reverse too
141         logln("Reverse iteration...");
142         norm.setMode(Normalizer.NFC);
143         backAndForth(norm, hangulCanon);
144      }
145 
146     @Test
TestHangulDecomp()147     public void TestHangulDecomp() throws Exception{
148         // Make sure that the static decomposition methods work
149         logln("Canonical decomposition...");
150         staticTest(Normalizer.NFD, hangulCanon,  1);
151         logln("Compatibility decomposition...");
152         staticTest(Normalizer.NFKD, hangulCompat, 1);
153 
154          // Now the iterative decomposition methods...
155         logln("Iterative decomposition...");
156         Normalizer norm = new Normalizer("", Normalizer.NFD,0);
157         iterateTest(norm, hangulCanon, 1);
158 
159         norm.setMode(Normalizer.NFKD);
160         iterateTest(norm, hangulCompat, 1);
161 
162         // And finally, make sure you can do it in reverse too
163         logln("Reverse iteration...");
164         norm.setMode(Normalizer.NFD);
165         backAndForth(norm, hangulCanon);
166     }
167     @Test
TestNone()168     public void TestNone() throws Exception{
169         Normalizer norm = new Normalizer("", Normalizer.NONE,0);
170         iterateTest(norm, canonTests, 0);
171         staticTest(Normalizer.NONE, canonTests, 0);
172     }
173     @Test
TestDecomp()174     public void TestDecomp() throws Exception{
175         Normalizer norm = new Normalizer("", Normalizer.NFD,0);
176         iterateTest(norm, canonTests, 1);
177         staticTest(Normalizer.NFD, canonTests, 1);
178         decomposeTest(Normalizer.NFD, canonTests, 1);
179     }
180 
181     @Test
TestCompatDecomp()182     public void TestCompatDecomp() throws Exception{
183         Normalizer norm = new Normalizer("", Normalizer.NFKD,0);
184         iterateTest(norm, compatTests, 1);
185         staticTest(Normalizer.NFKD,compatTests, 1);
186         decomposeTest(Normalizer.NFKD,compatTests, 1);
187     }
188 
189     @Test
TestCanonCompose()190     public void TestCanonCompose() throws Exception{
191         Normalizer norm = new Normalizer("", Normalizer.NFC,0);
192         staticTest(Normalizer.NFC, canonTests, 2);
193         iterateTest(norm, canonTests, 2);
194         composeTest(Normalizer.NFC, canonTests, 2);
195     }
196 
197     @Test
TestCompatCompose()198     public void TestCompatCompose() throws Exception{
199         Normalizer norm = new Normalizer("", Normalizer.NFKC,0);
200         iterateTest(norm, compatTests, 2);
201         staticTest(Normalizer.NFKC,compatTests, 2);
202         composeTest(Normalizer.NFKC,compatTests, 2);
203     }
204 
205     @Test
TestExplodingBase()206     public void TestExplodingBase() throws Exception{
207         // \u017f - Latin small letter long s
208         // \u0307 - combining dot above
209         // \u1e61 - Latin small letter s with dot above
210         // \u1e9b - Latin small letter long s with dot above
211         String[][] canon = {
212             // Input                Decomposed              Composed
213             { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
214             { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
215         };
216         String[][] compat = {
217             // Input                Decomposed              Composed
218             { "\u017f",        "s",              "s"           },
219             { "\u1e9b",        "s\u0307",        "\u1e61"      },
220         };
221 
222         staticTest(Normalizer.NFD, canon,  1);
223         staticTest(Normalizer.NFC, canon,  2);
224 
225         staticTest(Normalizer.NFKD, compat, 1);
226         staticTest(Normalizer.NFKC, compat, 2);
227 
228     }
229 
230     /**
231      * The Tibetan vowel sign AA, 0f71, was messed up prior to
232      * Unicode version 2.1.9.
233      * Once 2.1.9 or 3.0 is released, uncomment this test.
234      */
235     @Test
TestTibetan()236     public void TestTibetan() throws Exception{
237         String[][] decomp = {
238             { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
239         };
240         String[][] compose = {
241             { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
242         };
243 
244         staticTest(Normalizer.NFD, decomp, 1);
245         staticTest(Normalizer.NFKD,decomp, 2);
246         staticTest(Normalizer.NFC, compose, 1);
247         staticTest(Normalizer.NFKC,compose, 2);
248     }
249 
250     /**
251      * Make sure characters in the CompositionExclusion.txt list do not get
252      * composed to.
253      */
254     @Test
TestCompositionExclusion()255     public void TestCompositionExclusion()
256                 throws Exception{
257         // This list is generated from CompositionExclusion.txt.
258         // Update whenever the normalizer tables are updated.  Note
259         // that we test all characters listed, even those that can be
260         // derived from the Unicode DB and are therefore commented
261         // out.
262         String EXCLUDED =
263             "\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958" +
264             "\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC" +
265             "\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E" +
266             "\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69" +
267             "\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2" +
268             "\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79" +
269             "\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB" +
270             "\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000" +
271             "\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10" +
272             "\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F" +
273             "\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31" +
274             "\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A" +
275             "\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46" +
276             "\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E";
277         for (int i=0; i<EXCLUDED.length(); ++i) {
278             String a = String.valueOf(EXCLUDED.charAt(i));
279             String b = Normalizer.normalize(a, Normalizer.NFKD);
280             String c = Normalizer.normalize(b, Normalizer.NFC);
281             if (c.equals(a)) {
282                 errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
283                       hex(b) + " x COMPOSE => " +
284                       hex(c));
285             } else if (isVerbose()) {
286                 logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
287                       hex(b) + " x COMPOSE => " +
288                       hex(c));
289             }
290         }
291         // The following method works too, but it is somewhat
292         // incestuous.  It uses UInfo, which is the same database that
293         // NormalizerBuilder uses, so if something is wrong with
294         // UInfo, the following test won't show it.  All it will show
295         // is that NormalizerBuilder has been run with whatever the
296         // current UInfo is.
297         //
298         // We comment this out in favor of the test above, which
299         // provides independent verification (but also requires
300         // independent updating).
301 //      logln("---");
302 //      UInfo uinfo = new UInfo();
303 //      for (int i=0; i<=0xFFFF; ++i) {
304 //          if (!uinfo.isExcludedComposition((char)i) ||
305 //              (!uinfo.hasCanonicalDecomposition((char)i) &&
306 //               !uinfo.hasCompatibilityDecomposition((char)i))) continue;
307 //          String a = String.valueOf((char)i);
308 //          String b = Normalizer.normalize(a,Normalizer.DECOMP_COMPAT,0);
309 //          String c = Normalizer.normalize(b,Normalizer.COMPOSE,0);
310 //          if (c.equals(a)) {
311 //              errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
312 //                    hex(b) + " x COMPOSE => " +
313 //                    hex(c));
314 //          } else if (isVerbose()) {
315 //              logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
316 //                    hex(b) + " x COMPOSE => " +
317 //                    hex(c));
318 //          }
319 //      }
320     }
321 
322     /**
323      * Test for a problem that showed up just before ICU 1.6 release
324      * having to do with combining characters with an index of zero.
325      * Such characters do not participate in any canonical
326      * decompositions.  However, having an index of zero means that
327      * they all share one typeMask[] entry, that is, they all have to
328      * map to the same canonical class, which is not the case, in
329      * reality.
330      */
331     @Test
TestZeroIndex()332     public void TestZeroIndex()
333                 throws Exception{
334         String[] DATA = {
335             // Expect col1 x COMPOSE_COMPAT => col2
336             // Expect col2 x DECOMP => col3
337             "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
338             "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
339             "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
340             "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
341             "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
342         };
343 
344         for (int i=0; i<DATA.length; i+=3) {
345             String a = DATA[i];
346             String b = Normalizer.normalize(a, Normalizer.NFKC);
347             String exp = DATA[i+1];
348             if (b.equals(exp)) {
349                 logln("Ok: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b));
350             } else {
351                 errln("FAIL: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b) +
352                       ", expect " + hex(exp));
353             }
354             a = Normalizer.normalize(b, Normalizer.NFD);
355             exp = DATA[i+2];
356             if (a.equals(exp)) {
357                 logln("Ok: " + hex(b) + " x DECOMP => " + hex(a));
358             } else {
359                 errln("FAIL: " + hex(b) + " x DECOMP => " + hex(a) +
360                       ", expect " + hex(exp));
361             }
362         }
363     }
364 
365     /**
366      * Test for a problem found by Verisign.  Problem is that
367      * characters at the start of a string are not put in canonical
368      * order correctly by compose() if there is no starter.
369      */
370     @Test
TestVerisign()371     public void TestVerisign()
372                 throws Exception{
373         String[] inputs = {
374             "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
375             "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
376         };
377         String[] outputs = {
378             "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
379             "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
380         };
381 
382         for (int i = 0; i < inputs.length; ++i) {
383             String input = inputs[i];
384             String output = outputs[i];
385             String result = Normalizer.decompose(input, false);
386             if (!result.equals(output)) {
387                 errln("FAIL input: " + hex(input));
388                 errln(" decompose: " + hex(result));
389                 errln("  expected: " + hex(output));
390             }
391             result = Normalizer.compose(input, false);
392             if (!result.equals(output)) {
393                 errln("FAIL input: " + hex(input));
394                 errln("   compose: " + hex(result));
395                 errln("  expected: " + hex(output));
396             }
397         }
398 
399     }
400     @Test
TestQuickCheckResultNO()401     public void  TestQuickCheckResultNO()
402                  throws Exception{
403         final char CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
404                                 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
405         final char CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
406                                 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
407         final char CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
408                                 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
409         final char CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
410                                 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
411 
412 
413         final int SIZE = 10;
414 
415         int count = 0;
416         for (; count < SIZE; count ++)
417         {
418             if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
419                     Normalizer.NFD,0) != Normalizer.NO)
420             {
421                 errln("ERROR in NFD quick check at U+" +
422                        Integer.toHexString(CPNFD[count]));
423                 return;
424             }
425             if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
426                         Normalizer.NFC,0) !=Normalizer.NO)
427             {
428                 errln("ERROR in NFC quick check at U+"+
429                        Integer.toHexString(CPNFC[count]));
430                 return;
431             }
432             if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
433                                 Normalizer.NFKD,0) != Normalizer.NO)
434             {
435                 errln("ERROR in NFKD quick check at U+"+
436                        Integer.toHexString(CPNFKD[count]));
437                 return;
438             }
439             if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
440                                          Normalizer.NFKC,0) !=Normalizer.NO)
441             {
442                 errln("ERROR in NFKC quick check at U+"+
443                        Integer.toHexString(CPNFKC[count]));
444                 return;
445             }
446             // for improving coverage
447             if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
448                                          Normalizer.NFKC) !=Normalizer.NO)
449             {
450                 errln("ERROR in NFKC quick check at U+"+
451                        Integer.toHexString(CPNFKC[count]));
452                 return;
453             }
454         }
455     }
456 
457 
458     @Test
TestQuickCheckResultYES()459     public void TestQuickCheckResultYES()
460                 throws Exception{
461         final char CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
462                                 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
463         final char CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
464                                 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
465         final char CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
466                                 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
467         final char CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
468                                 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
469 
470         final int SIZE = 10;
471         int count = 0;
472 
473         char cp = 0;
474         while (cp < 0xA0)
475         {
476             if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFD,0)
477                                             != Normalizer.YES)
478             {
479                 errln("ERROR in NFD quick check at U+"+
480                                                       Integer.toHexString(cp));
481                 return;
482             }
483             if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFC,0)
484                                              != Normalizer.YES)
485             {
486                 errln("ERROR in NFC quick check at U+"+
487                                                       Integer.toHexString(cp));
488                 return;
489             }
490             if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKD,0)
491                                              != Normalizer.YES)
492             {
493                 errln("ERROR in NFKD quick check at U+" +
494                                                       Integer.toHexString(cp));
495                 return;
496             }
497             if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKC,0)
498                                              != Normalizer.YES)
499             {
500                 errln("ERROR in NFKC quick check at U+"+
501                                                        Integer.toHexString(cp));
502                 return;
503             }
504             // improve the coverage
505             if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKC)
506                                              != Normalizer.YES)
507             {
508                 errln("ERROR in NFKC quick check at U+"+
509                                                        Integer.toHexString(cp));
510                 return;
511             }
512             cp++;
513         }
514 
515         for (; count < SIZE; count ++)
516         {
517             if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
518                                          Normalizer.NFD,0)!=Normalizer.YES)
519             {
520                 errln("ERROR in NFD quick check at U+"+
521                                              Integer.toHexString(CPNFD[count]));
522                 return;
523             }
524             if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
525                                          Normalizer.NFC,0)!=Normalizer.YES)
526             {
527                 errln("ERROR in NFC quick check at U+"+
528                                              Integer.toHexString(CPNFC[count]));
529                 return;
530             }
531             if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
532                                          Normalizer.NFKD,0)!=Normalizer.YES)
533             {
534                 errln("ERROR in NFKD quick check at U+"+
535                                     Integer.toHexString(CPNFKD[count]));
536                 return;
537             }
538             if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
539                                          Normalizer.NFKC,0)!=Normalizer.YES)
540             {
541                 errln("ERROR in NFKC quick check at U+"+
542                         Integer.toHexString(CPNFKC[count]));
543                 return;
544             }
545             // improve the coverage
546             if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
547                                          Normalizer.NFKC)!=Normalizer.YES)
548             {
549                 errln("ERROR in NFKC quick check at U+"+
550                         Integer.toHexString(CPNFKC[count]));
551                 return;
552             }
553         }
554     }
555     @Test
TestBengali()556     public void TestBengali() throws Exception{
557         String input = "\u09bc\u09be\u09cd\u09be";
558         String output=Normalizer.normalize(input,Normalizer.NFC);
559         if(!input.equals(output)){
560              errln("ERROR in NFC of string");
561         }
562     }
563     @Test
TestQuickCheckResultMAYBE()564     public void TestQuickCheckResultMAYBE()
565                 throws Exception{
566 
567         final char[] CPNFC = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
568                                 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
569         final char[] CPNFKC = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
570                                 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
571 
572 
573         final int SIZE = 10;
574 
575         int count = 0;
576 
577         /* NFD and NFKD does not have any MAYBE codepoints */
578         for (; count < SIZE; count ++)
579         {
580             if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
581                                         Normalizer.NFC,0)!=Normalizer.MAYBE)
582             {
583                 errln("ERROR in NFC quick check at U+"+
584                                             Integer.toHexString(CPNFC[count]));
585                 return;
586             }
587             if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
588                                        Normalizer.NFKC,0)!=Normalizer.MAYBE)
589             {
590                 errln("ERROR in NFKC quick check at U+"+
591                                             Integer.toHexString(CPNFKC[count]));
592                 return;
593             }
594             if (Normalizer.quickCheck(new char[]{CPNFC[count]},
595                                         Normalizer.NFC,0)!=Normalizer.MAYBE)
596             {
597                 errln("ERROR in NFC quick check at U+"+
598                                             Integer.toHexString(CPNFC[count]));
599                 return;
600             }
601             if (Normalizer.quickCheck(new char[]{CPNFKC[count]},
602                                        Normalizer.NFKC,0)!=Normalizer.MAYBE)
603             {
604                 errln("ERROR in NFKC quick check at U+"+
605                                             Integer.toHexString(CPNFKC[count]));
606                 return;
607             }
608             if (Normalizer.quickCheck(new char[]{CPNFKC[count]},
609                                        Normalizer.NONE,0)!=Normalizer.YES)
610             {
611                 errln("ERROR in NONE quick check at U+"+
612                                             Integer.toHexString(CPNFKC[count]));
613                 return;
614             }
615         }
616     }
617 
618     @Test
TestQuickCheckStringResult()619     public void TestQuickCheckStringResult()
620                 throws Exception{
621         int count;
622         String d;
623         String c;
624 
625         for (count = 0; count < canonTests.length; count ++)
626         {
627             d = canonTests[count][1];
628             c = canonTests[count][2];
629             if (Normalizer.quickCheck(d,Normalizer.NFD,0)
630                                             != Normalizer.YES)
631             {
632                 errln("ERROR in NFD quick check for string at count " + count);
633                 return;
634             }
635 
636             if (Normalizer.quickCheck(c, Normalizer.NFC,0)
637                                             == Normalizer.NO)
638             {
639                 errln("ERROR in NFC quick check for string at count " + count);
640                 return;
641             }
642         }
643 
644         for (count = 0; count < compatTests.length; count ++)
645         {
646             d = compatTests[count][1];
647             c = compatTests[count][2];
648             if (Normalizer.quickCheck(d, Normalizer.NFKD,0)
649                                             != Normalizer.YES)
650             {
651                 errln("ERROR in NFKD quick check for string at count " + count);
652                 return;
653             }
654 
655             if (Normalizer.quickCheck(c,  Normalizer.NFKC,0)
656                                             != Normalizer.YES)
657             {
658                 errln("ERROR in NFKC quick check for string at count " + count);
659                 return;
660             }
661         }
662     }
663 
qcToInt(Normalizer.QuickCheckResult qc)664     static final int qcToInt(Normalizer.QuickCheckResult qc) {
665         if(qc==Normalizer.NO) {
666             return 0;
667         } else if(qc==Normalizer.YES) {
668             return 1;
669         } else /* Normalizer.MAYBE */ {
670             return 2;
671         }
672     }
673 
674     @Test
TestQuickCheckPerCP()675     public void TestQuickCheckPerCP() {
676         int c, lead, trail;
677         String s, nfd;
678         int lccc1, lccc2, tccc1, tccc2;
679         int qc1, qc2;
680 
681         if(
682             UCharacter.getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK)!=1 || // YES
683             UCharacter.getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK)!=1 ||
684             UCharacter.getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK)!=2 || // MAYBE
685             UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2 ||
686             UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS) ||
687             UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)
688         ) {
689             errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
690         }
691 
692         /*
693          * compare the quick check property values for some code points
694          * to the quick check results for checking same-code point strings
695          */
696         c=0;
697         while(c<0x110000) {
698             s=UTF16.valueOf(c);
699 
700             qc1=UCharacter.getIntPropertyValue(c, UProperty.NFC_QUICK_CHECK);
701             qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFC));
702             if(qc1!=qc2) {
703                 errln("getIntPropertyValue(NFC)="+qc1+" != "+qc2+"=quickCheck(NFC) for U+"+Integer.toHexString(c));
704             }
705 
706             qc1=UCharacter.getIntPropertyValue(c, UProperty.NFD_QUICK_CHECK);
707             qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFD));
708             if(qc1!=qc2) {
709                 errln("getIntPropertyValue(NFD)="+qc1+" != "+qc2+"=quickCheck(NFD) for U+"+Integer.toHexString(c));
710             }
711 
712             qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKC_QUICK_CHECK);
713             qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKC));
714             if(qc1!=qc2) {
715                 errln("getIntPropertyValue(NFKC)="+qc1+" != "+qc2+"=quickCheck(NFKC) for U+"+Integer.toHexString(c));
716             }
717 
718             qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKD_QUICK_CHECK);
719             qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKD));
720             if(qc1!=qc2) {
721                 errln("getIntPropertyValue(NFKD)="+qc1+" != "+qc2+"=quickCheck(NFKD) for U+"+Integer.toHexString(c));
722             }
723 
724             nfd=Normalizer.normalize(s, Normalizer.NFD);
725             lead=UTF16.charAt(nfd, 0);
726             trail=UTF16.charAt(nfd, nfd.length()-1);
727 
728             lccc1=UCharacter.getIntPropertyValue(c, UProperty.LEAD_CANONICAL_COMBINING_CLASS);
729             lccc2=UCharacter.getCombiningClass(lead);
730             tccc1=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
731             tccc2=UCharacter.getCombiningClass(trail);
732 
733             if(lccc1!=lccc2) {
734                 errln("getIntPropertyValue(lccc)="+lccc1+" != "+lccc2+"=getCombiningClass(lead) for U+"+Integer.toHexString(c));
735             }
736             if(tccc1!=tccc2) {
737                 errln("getIntPropertyValue(tccc)="+tccc1+" != "+tccc2+"=getCombiningClass(trail) for U+"+Integer.toHexString(c));
738             }
739 
740             /* skip some code points */
741             c=(20*c)/19+1;
742         }
743     }
744 
745     //------------------------------------------------------------------------
746     // Internal utilities
747     //
748        //------------------------------------------------------------------------
749     // Internal utilities
750     //
751 
752 /*    private void backAndForth(Normalizer iter, String input)
753     {
754         iter.setText(input);
755 
756         // Run through the iterator forwards and stick it into a StringBuffer
757         StringBuffer forward =  new StringBuffer();
758         for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter.next()) {
759             forward.append(ch);
760         }
761 
762         // Now do it backwards
763         StringBuffer reverse = new StringBuffer();
764         for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter.previous()) {
765             reverse.insert(0, ch);
766         }
767 
768         if (!forward.toString().equals(reverse.toString())) {
769             errln("FAIL: Forward/reverse mismatch for input " + hex(input)
770                   + ", forward: " + hex(forward) + ", backward: "+hex(reverse));
771         } else if (isVerbose()) {
772             logln("Ok: Forward/reverse for input " + hex(input)
773                   + ", forward: " + hex(forward) + ", backward: "+hex(reverse));
774         }
775     }*/
776 
backAndForth(Normalizer iter, String[][] tests)777     private void backAndForth(Normalizer iter, String[][] tests)
778     {
779         for (int i = 0; i < tests.length; i++)
780         {
781             iter.setText(tests[i][0]);
782 
783             // Run through the iterator forwards and stick it into a
784             // StringBuffer
785             StringBuffer forward =  new StringBuffer();
786             for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter.next()) {
787                 forward.append(ch);
788             }
789 
790             // Now do it backwards
791             StringBuffer reverse = new StringBuffer();
792             for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter.previous()) {
793                 reverse.insert(0, ch);
794             }
795 
796             if (!forward.toString().equals(reverse.toString())) {
797                 errln("FAIL: Forward/reverse mismatch for input "
798                     + hex(tests[i][0]) + ", forward: " + hex(forward)
799                     + ", backward: " + hex(reverse));
800             } else if (isVerbose()) {
801                 logln("Ok: Forward/reverse for input " + hex(tests[i][0])
802                       + ", forward: " + hex(forward) + ", backward: "
803                       + hex(reverse));
804             }
805         }
806     }
807 
staticTest(Normalizer.Mode mode, String[][] tests, int outCol)808     private void staticTest (Normalizer.Mode mode,
809                              String[][] tests, int outCol) throws Exception{
810         for (int i = 0; i < tests.length; i++)
811         {
812             String input = Utility.unescape(tests[i][0]);
813             String expect = Utility.unescape(tests[i][outCol]);
814 
815             logln("Normalizing '" + input + "' (" + hex(input) + ")" );
816 
817             String output = Normalizer.normalize(input, mode);
818 
819             if (!output.equals(expect)) {
820                 errln("FAIL: case " + i
821                     + " expected '" + expect + "' (" + hex(expect) + ")"
822                     + " but got '" + output + "' (" + hex(output) + ")" );
823             }
824         }
825         char[] output = new char[1];
826         for (int i = 0; i < tests.length; i++)
827         {
828             char[] input = Utility.unescape(tests[i][0]).toCharArray();
829             String expect =Utility.unescape( tests[i][outCol]);
830 
831             logln("Normalizing '" + new String(input) + "' (" +
832                         hex(new String(input)) + ")" );
833             int reqLength=0;
834             while(true){
835                 try{
836                     reqLength=Normalizer.normalize(input,output, mode,0);
837                     if(reqLength<=output.length    ){
838                         break;
839                     }
840                 }catch(IndexOutOfBoundsException e){
841                     output= new char[Integer.parseInt(e.getMessage())];
842                     continue;
843                 }
844             }
845             if (!expect.equals(new String(output,0,reqLength))) {
846                 errln("FAIL: case " + i
847                     + " expected '" + expect + "' (" + hex(expect) + ")"
848                     + " but got '" + new String(output)
849                     + "' ("  + hex(new String(output)) + ")" );
850             }
851         }
852     }
decomposeTest(Normalizer.Mode mode, String[][] tests, int outCol)853     private void decomposeTest(Normalizer.Mode mode,
854                              String[][] tests, int outCol) throws Exception{
855         for (int i = 0; i < tests.length; i++)
856         {
857             String input = Utility.unescape(tests[i][0]);
858             String expect = Utility.unescape(tests[i][outCol]);
859 
860             logln("Normalizing '" + input + "' (" + hex(input) + ")" );
861 
862             String output = Normalizer.decompose(input, mode==Normalizer.NFKD);
863 
864             if (!output.equals(expect)) {
865                 errln("FAIL: case " + i
866                     + " expected '" + expect + "' (" + hex(expect) + ")"
867                     + " but got '" + output + "' (" + hex(output) + ")" );
868             }
869         }
870         char[] output = new char[1];
871         for (int i = 0; i < tests.length; i++)
872         {
873             char[] input = Utility.unescape(tests[i][0]).toCharArray();
874             String expect = Utility.unescape(tests[i][outCol]);
875 
876             logln("Normalizing '" + new String(input) + "' (" +
877                         hex(new String(input)) + ")" );
878             int reqLength=0;
879             while(true){
880                 try{
881                     reqLength=Normalizer.decompose(input,output, mode==Normalizer.NFKD,0);
882                     if(reqLength<=output.length ){
883                         break;
884                     }
885                 }catch(IndexOutOfBoundsException e){
886                     output= new char[Integer.parseInt(e.getMessage())];
887                     continue;
888                 }
889             }
890             if (!expect.equals(new String(output,0,reqLength))) {
891                 errln("FAIL: case " + i
892                     + " expected '" + expect + "' (" + hex(expect) + ")"
893                     + " but got '" + new String(output)
894                     + "' ("  + hex(new String(output)) + ")" );
895             }
896         }
897         output = new char[1];
898         for (int i = 0; i < tests.length; i++)
899         {
900            char[] input = Utility.unescape(tests[i][0]).toCharArray();
901            String expect = Utility.unescape(tests[i][outCol]);
902 
903            logln("Normalizing '" + new String(input) + "' (" +
904                        hex(new String(input)) + ")" );
905            int reqLength=0;
906            while(true){
907                try{
908                    reqLength=Normalizer.decompose(input,0,input.length,output,0,output.length, mode==Normalizer.NFKD,0);
909                    if(reqLength<=output.length ){
910                        break;
911                    }
912                }catch(IndexOutOfBoundsException e){
913                    output= new char[Integer.parseInt(e.getMessage())];
914                    continue;
915                }
916            }
917            if (!expect.equals(new String(output,0,reqLength))) {
918                errln("FAIL: case " + i
919                    + " expected '" + expect + "' (" + hex(expect) + ")"
920                    + " but got '" + new String(output)
921                    + "' ("  + hex(new String(output)) + ")" );
922            }
923            char[] output2 = new char[reqLength * 2];
924            System.arraycopy(output, 0, output2, 0, reqLength);
925            int retLength = Normalizer.decompose(input,0,input.length, output2, reqLength, output2.length, mode==Normalizer.NFKC,0);
926            if(retLength != reqLength){
927                logln("FAIL: Normalizer.compose did not return the expected length. Expected: " +reqLength + " Got: " + retLength);
928            }
929         }
930     }
931 
composeTest(Normalizer.Mode mode, String[][] tests, int outCol)932     private void composeTest(Normalizer.Mode mode,
933                              String[][] tests, int outCol) throws Exception{
934         for (int i = 0; i < tests.length; i++)
935         {
936             String input = Utility.unescape(tests[i][0]);
937             String expect = Utility.unescape(tests[i][outCol]);
938 
939             logln("Normalizing '" + input + "' (" + hex(input) + ")" );
940 
941             String output = Normalizer.compose(input, mode==Normalizer.NFKC);
942 
943             if (!output.equals(expect)) {
944                 errln("FAIL: case " + i
945                     + " expected '" + expect + "' (" + hex(expect) + ")"
946                     + " but got '" + output + "' (" + hex(output) + ")" );
947             }
948         }
949         char[] output = new char[1];
950         for (int i = 0; i < tests.length; i++)
951         {
952             char[] input = Utility.unescape(tests[i][0]).toCharArray();
953             String expect = Utility.unescape(tests[i][outCol]);
954 
955             logln("Normalizing '" + new String(input) + "' (" +
956                         hex(new String(input)) + ")" );
957             int reqLength=0;
958             while(true){
959                 try{
960                     reqLength=Normalizer.compose(input,output, mode==Normalizer.NFKC,0);
961                     if(reqLength<=output.length ){
962                         break;
963                     }
964                 }catch(IndexOutOfBoundsException e){
965                     output= new char[Integer.parseInt(e.getMessage())];
966                     continue;
967                 }
968             }
969             if (!expect.equals(new String(output,0,reqLength))) {
970                 errln("FAIL: case " + i
971                     + " expected '" + expect + "' (" + hex(expect) + ")"
972                     + " but got '" + new String(output)
973                     + "' ("  + hex(new String(output)) + ")" );
974             }
975         }
976         output = new char[1];
977         for (int i = 0; i < tests.length; i++)
978         {
979             char[] input = Utility.unescape(tests[i][0]).toCharArray();
980             String expect = Utility.unescape(tests[i][outCol]);
981 
982             logln("Normalizing '" + new String(input) + "' (" +
983                         hex(new String(input)) + ")" );
984             int reqLength=0;
985             while(true){
986                 try{
987                     reqLength=Normalizer.compose(input,0,input.length, output, 0, output.length, mode==Normalizer.NFKC,0);
988                     if(reqLength<=output.length ){
989                         break;
990                     }
991                 }catch(IndexOutOfBoundsException e){
992                     output= new char[Integer.parseInt(e.getMessage())];
993                     continue;
994                 }
995             }
996             if (!expect.equals(new String(output,0,reqLength))) {
997                 errln("FAIL: case " + i
998                     + " expected '" + expect + "' (" + hex(expect) + ")"
999                     + " but got '" + new String(output)
1000                     + "' ("  + hex(new String(output)) + ")" );
1001             }
1002 
1003             char[] output2 = new char[reqLength * 2];
1004             System.arraycopy(output, 0, output2, 0, reqLength);
1005             int retLength = Normalizer.compose(input,0,input.length, output2, reqLength, output2.length, mode==Normalizer.NFKC,0);
1006             if(retLength != reqLength){
1007                 logln("FAIL: Normalizer.compose did not return the expected length. Expected: " +reqLength + " Got: " + retLength);
1008             }
1009         }
1010     }
iterateTest(Normalizer iter, String[][] tests, int outCol)1011     private void iterateTest(Normalizer iter, String[][] tests, int outCol){
1012         for (int i = 0; i < tests.length; i++)
1013         {
1014             String input = Utility.unescape(tests[i][0]);
1015             String expect = Utility.unescape(tests[i][outCol]);
1016 
1017             logln("Normalizing '" + input + "' (" + hex(input) + ")" );
1018 
1019             iter.setText(input);
1020             assertEqual(expect, iter, "case " + i + " ");
1021         }
1022     }
1023 
assertEqual(String expected, Normalizer iter, String msg)1024     private void assertEqual(String expected, Normalizer iter, String msg)
1025     {
1026         int index = 0;
1027         int ch;
1028         UCharacterIterator cIter =  UCharacterIterator.getInstance(expected);
1029 
1030         while ((ch=iter.next())!= Normalizer.DONE){
1031             if (index >= expected.length()) {
1032                 errln("FAIL: " + msg + "Unexpected character '" + (char)ch
1033                         + "' (" + hex(ch) + ")"
1034                         + " at index " + index);
1035                 break;
1036             }
1037             int want = UTF16.charAt(expected,index);
1038             if (ch != want) {
1039                 errln("FAIL: " + msg + "got '" + (char)ch
1040                         + "' (" + hex(ch) + ")"
1041                         + " but expected '" + want + "' (" + hex(want)+ ")"
1042                         + " at index " + index);
1043             }
1044             index+=  UTF16.getCharCount(ch);
1045         }
1046         if (index < expected.length()) {
1047             errln("FAIL: " + msg + "Only got " + index + " chars, expected "
1048             + expected.length());
1049         }
1050 
1051         cIter.setToLimit();
1052         while((ch=iter.previous())!=Normalizer.DONE){
1053             int want = cIter.previousCodePoint();
1054             if (ch != want ) {
1055                 errln("FAIL: " + msg + "got '" + (char)ch
1056                         + "' (" + hex(ch) + ")"
1057                         + " but expected '" + want + "' (" + hex(want) + ")"
1058                         + " at index " + index);
1059             }
1060         }
1061     }
1062     //--------------------------------------------------------------------------
1063 
1064     // NOTE: These tests are used for quick debugging so are not ported
1065     // to ICU4C tsnorm.cpp in intltest
1066     //
1067 
1068     @Test
TestDebugStatic()1069     public void TestDebugStatic(){
1070         String in = Utility.unescape("\\U0001D157\\U0001D165");
1071         if(!Normalizer.isNormalized(in,Normalizer.NFC,0)){
1072             errln("isNormalized failed");
1073         }
1074 
1075         String input  =  "\uAD8B\uAD8B\uAD8B\uAD8B"+
1076             "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1077             "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1078             "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1079             "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1080             "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1081             "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1082             "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1083             "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1084             "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1085             "\uAD8B\uAD8B\uAD8B\uAD8B"+
1086             "d\u031B\u0307\u0323";
1087         String expect = "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"+
1088                         "\u11AA\u1100\u116F\u11AA\uD834\uDD57\uD834\uDD65"+
1089                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1090                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1091                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1092                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1093                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1094                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1095                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1096                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1097                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1098                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1099                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1100                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1101                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1102                         "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1103                         "\uD834\uDD57\uD834\uDD65aaaaaaaaaaaaaaaaaazzzzzz"+
1104                         "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1105                         "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1106                         "bbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccc"+
1107                         "cccccccccccccccccccccccccccccccccccccccccccccccc"+
1108                         "ddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1109                         "dddddddddddddddddddddddd"+
1110                         "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"+
1111                         "\u11AA\u1100\u116F\u11AA\u0064\u031B\u0323\u0307";
1112             String output = Normalizer.normalize(Utility.unescape(input),
1113                             Normalizer.NFD);
1114             if(!expect.equals(output)){
1115                 errln("FAIL expected: "+hex(expect) + " got: "+hex(output));
1116             }
1117 
1118 
1119 
1120     }
1121     @Test
TestDebugIter()1122     public void TestDebugIter(){
1123         String src = Utility.unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1124         String expected = Utility.unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1125         Normalizer iter = new Normalizer(new StringCharacterIterator(Utility.unescape(src)),
1126                                                 Normalizer.NONE,0);
1127         int index = 0;
1128         int ch;
1129         UCharacterIterator cIter =  UCharacterIterator.getInstance(expected);
1130 
1131         while ((ch=iter.next())!= Normalizer.DONE){
1132             if (index >= expected.length()) {
1133                 errln("FAIL: " +  "Unexpected character '" + (char)ch
1134                         + "' (" + hex(ch) + ")"
1135                         + " at index " + index);
1136                 break;
1137             }
1138             int want = UTF16.charAt(expected,index);
1139             if (ch != want) {
1140                 errln("FAIL: " +  "got '" + (char)ch
1141                         + "' (" + hex(ch) + ")"
1142                         + " but expected '" + want + "' (" + hex(want)+ ")"
1143                         + " at index " + index);
1144             }
1145             index+=  UTF16.getCharCount(ch);
1146         }
1147         if (index < expected.length()) {
1148             errln("FAIL: " +  "Only got " + index + " chars, expected "
1149             + expected.length());
1150         }
1151 
1152         cIter.setToLimit();
1153         while((ch=iter.previous())!=Normalizer.DONE){
1154             int want = cIter.previousCodePoint();
1155             if (ch != want ) {
1156                 errln("FAIL: " + "got '" + (char)ch
1157                         + "' (" + hex(ch) + ")"
1158                         + " but expected '" + want + "' (" + hex(want) + ")"
1159                         + " at index " + index);
1160             }
1161         }
1162     }
1163     @Test
TestDebugIterOld()1164     public void TestDebugIterOld(){
1165         String input = "\\U0001D15E";
1166         String expected = "\uD834\uDD57\uD834\uDD65";
1167         String expectedReverse = "\uD834\uDD65\uD834\uDD57";
1168         int index = 0;
1169         int ch;
1170         Normalizer iter = new Normalizer(new StringCharacterIterator(Utility.unescape(input)),
1171                                                 Normalizer.NFKC,0);
1172         StringBuffer got = new StringBuffer();
1173         for (ch = iter.first();ch!=Normalizer.DONE;ch=iter.next())
1174         {
1175             if (index >= expected.length()) {
1176                 errln("FAIL: " +  "Unexpected character '" + (char)ch +
1177                        "' (" + hex(ch) + ")" + " at index " + index);
1178                 break;
1179             }
1180             got.append(UCharacter.toString(ch));
1181             index++;
1182         }
1183         if (!expected.equals(got.toString())) {
1184                 errln("FAIL: " +  "got '" +got+ "' (" + hex(got) + ")"
1185                         + " but expected '" + expected + "' ("
1186                         + hex(expected) + ")");
1187         }
1188         if (got.length() < expected.length()) {
1189             errln("FAIL: " +  "Only got " + index + " chars, expected "
1190                            + expected.length());
1191         }
1192 
1193         logln("Reverse Iteration\n");
1194         iter.setIndexOnly(iter.endIndex());
1195         got.setLength(0);
1196         for(ch=iter.previous();ch!=Normalizer.DONE;ch=iter.previous()){
1197             if (index >= expected.length()) {
1198                 errln("FAIL: " +  "Unexpected character '" + (char)ch
1199                                + "' (" + hex(ch) + ")" + " at index " + index);
1200                 break;
1201             }
1202             got.append(UCharacter.toString(ch));
1203         }
1204         if (!expectedReverse.equals(got.toString())) {
1205                 errln("FAIL: " +  "got '" +got+ "' (" + hex(got) + ")"
1206                                + " but expected '" + expected
1207                                + "' (" + hex(expected) + ")");
1208         }
1209         if (got.length() < expected.length()) {
1210             errln("FAIL: " +  "Only got " + index + " chars, expected "
1211                       + expected.length());
1212         }
1213 
1214     }
1215     //--------------------------------------------------------------------------
1216     // helper class for TestPreviousNext()
1217     // simple UTF-32 character iterator
1218     class UCharIterator {
1219 
UCharIterator(int[] src, int len, int index)1220        public UCharIterator(int[] src, int len, int index){
1221 
1222             s=src;
1223             length=len;
1224             i=index;
1225        }
1226 
current()1227         public int current() {
1228             if(i<length) {
1229                 return s[i];
1230             } else {
1231                 return -1;
1232             }
1233         }
1234 
next()1235         public int next() {
1236             if(i<length) {
1237                 return s[i++];
1238             } else {
1239                 return -1;
1240             }
1241         }
1242 
previous()1243         public int previous() {
1244             if(i>0) {
1245                 return s[--i];
1246             } else {
1247                 return -1;
1248             }
1249         }
1250 
getIndex()1251         public int getIndex() {
1252             return i;
1253         }
1254 
1255         private int[] s;
1256         private int length, i;
1257     }
1258     @Test
TestPreviousNext()1259     public void TestPreviousNext() {
1260         // src and expect strings
1261         char src[]={
1262             UTF16.getLeadSurrogate(0x2f999), UTF16.getTrailSurrogate(0x2f999),
1263             UTF16.getLeadSurrogate(0x1d15f), UTF16.getTrailSurrogate(0x1d15f),
1264             0xc4,
1265             0x1ed0
1266         };
1267         int expect[]={
1268             0x831d,
1269             0x1d158, 0x1d165,
1270             0x41, 0x308,
1271             0x4f, 0x302, 0x301
1272         };
1273 
1274         // expected src indexes corresponding to expect indexes
1275         int expectIndex[]={
1276             0,
1277             2, 2,
1278             4, 4,
1279             5, 5, 5,
1280             6 // behind last character
1281         };
1282 
1283         // initial indexes into the src and expect strings
1284 
1285         final int SRC_MIDDLE=4;
1286         final int EXPECT_MIDDLE=3;
1287 
1288 
1289         // movement vector
1290         // - for previous(), 0 for current(), + for next()
1291         // not const so that we can terminate it below for the error message
1292         String moves="0+0+0--0-0-+++0--+++++++0--------";
1293 
1294         // iterators
1295         Normalizer iter = new Normalizer(new String(src),
1296                                                 Normalizer.NFD,0);
1297         UCharIterator iter32 = new UCharIterator(expect, expect.length,
1298                                                      EXPECT_MIDDLE);
1299 
1300         int c1, c2;
1301         char m;
1302 
1303         // initially set the indexes into the middle of the strings
1304         iter.setIndexOnly(SRC_MIDDLE);
1305 
1306         // move around and compare the iteration code points with
1307         // the expected ones
1308         int movesIndex =0;
1309         while(movesIndex<moves.length()) {
1310             m=moves.charAt(movesIndex++);
1311             if(m=='-') {
1312                 c1=iter.previous();
1313                 c2=iter32.previous();
1314             } else if(m=='0') {
1315                 c1=iter.current();
1316                 c2=iter32.current();
1317             } else /* m=='+' */ {
1318                 c1=iter.next();
1319                 c2=iter32.next();
1320             }
1321 
1322             // compare results
1323             if(c1!=c2) {
1324                 // copy the moves until the current (m) move, and terminate
1325                 String history = moves.substring(0,movesIndex);
1326                 errln("error: mismatch in Normalizer iteration at "+history+": "
1327                       +"got c1= " + hex(c1) +" != expected c2= "+ hex(c2));
1328                 break;
1329             }
1330 
1331             // compare indexes
1332             if(iter.getIndex()!=expectIndex[iter32.getIndex()]) {
1333                 // copy the moves until the current (m) move, and terminate
1334                 String history = moves.substring(0,movesIndex);
1335                 errln("error: index mismatch in Normalizer iteration at "
1336                       +history+ " : "+ "Normalizer index " +iter.getIndex()
1337                       +" expected "+ expectIndex[iter32.getIndex()]);
1338                 break;
1339             }
1340         }
1341     }
1342     // Only in ICU4j
1343     @Test
TestPreviousNextJCI()1344     public void TestPreviousNextJCI() {
1345         // src and expect strings
1346         char src[]={
1347             UTF16.getLeadSurrogate(0x2f999), UTF16.getTrailSurrogate(0x2f999),
1348             UTF16.getLeadSurrogate(0x1d15f), UTF16.getTrailSurrogate(0x1d15f),
1349             0xc4,
1350             0x1ed0
1351         };
1352         int expect[]={
1353             0x831d,
1354             0x1d158, 0x1d165,
1355             0x41, 0x308,
1356             0x4f, 0x302, 0x301
1357         };
1358 
1359         // expected src indexes corresponding to expect indexes
1360         int expectIndex[]={
1361             0,
1362             2, 2,
1363             4, 4,
1364             5, 5, 5,
1365             6 // behind last character
1366         };
1367 
1368         // initial indexes into the src and expect strings
1369 
1370         final int SRC_MIDDLE=4;
1371         final int EXPECT_MIDDLE=3;
1372 
1373 
1374         // movement vector
1375         // - for previous(), 0 for current(), + for next()
1376         // not const so that we can terminate it below for the error message
1377         String moves="0+0+0--0-0-+++0--+++++++0--------";
1378 
1379         // iterators
1380         StringCharacterIterator text = new StringCharacterIterator(new String(src));
1381         Normalizer iter = new Normalizer(text,Normalizer.NFD,0);
1382         UCharIterator iter32 = new UCharIterator(expect, expect.length,
1383                                                      EXPECT_MIDDLE);
1384 
1385         int c1, c2;
1386         char m;
1387 
1388         // initially set the indexes into the middle of the strings
1389         iter.setIndexOnly(SRC_MIDDLE);
1390 
1391         // move around and compare the iteration code points with
1392         // the expected ones
1393         int movesIndex =0;
1394         while(movesIndex<moves.length()) {
1395             m=moves.charAt(movesIndex++);
1396             if(m=='-') {
1397                 c1=iter.previous();
1398                 c2=iter32.previous();
1399             } else if(m=='0') {
1400                 c1=iter.current();
1401                 c2=iter32.current();
1402             } else /* m=='+' */ {
1403                 c1=iter.next();
1404                 c2=iter32.next();
1405             }
1406 
1407             // compare results
1408             if(c1!=c2) {
1409                 // copy the moves until the current (m) move, and terminate
1410                 String history = moves.substring(0,movesIndex);
1411                 errln("error: mismatch in Normalizer iteration at "+history+": "
1412                       +"got c1= " + hex(c1) +" != expected c2= "+ hex(c2));
1413                 break;
1414             }
1415 
1416             // compare indexes
1417             if(iter.getIndex()!=expectIndex[iter32.getIndex()]) {
1418                 // copy the moves until the current (m) move, and terminate
1419                 String history = moves.substring(0,movesIndex);
1420                 errln("error: index mismatch in Normalizer iteration at "
1421                       +history+ " : "+ "Normalizer index " +iter.getIndex()
1422                       +" expected "+ expectIndex[iter32.getIndex()]);
1423                 break;
1424             }
1425         }
1426     }
1427 
1428     // test APIs that are not otherwise used - improve test coverage
1429     @Test
TestNormalizerAPI()1430     public void TestNormalizerAPI() throws Exception {
1431         try{
1432             // instantiate a Normalizer from a CharacterIterator
1433             String s=Utility.unescape("a\u0308\uac00\\U0002f800");
1434             // make s a bit longer and more interesting
1435             UCharacterIterator iter = UCharacterIterator.getInstance(s+s);
1436             Normalizer norm = new Normalizer(iter, Normalizer.NFC,0);
1437             if(norm.next()!=0xe4) {
1438                 errln("error in Normalizer(CharacterIterator).next()");
1439             }
1440 
1441             // test clone(), ==, and hashCode()
1442             Normalizer clone=(Normalizer)norm.clone();
1443             if(clone.equals(norm)) {
1444                 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=norm");
1445             }
1446 
1447             if(clone.getLength()!= norm.getLength()){
1448                errln("error in Normalizer.getBeginIndex()");
1449             }
1450             // clone must have the same hashCode()
1451             //if(clone.hashCode()!=norm.hashCode()) {
1452             //    errln("error in Normalizer(Normalizer(CharacterIterator)).clone().hashCode()!=copy.hashCode()");
1453             //}
1454             if(clone.next()!=0xac00) {
1455                 errln("error in Normalizer(Normalizer(CharacterIterator)).next()");
1456             }
1457             int ch = clone.next();
1458             if(ch!=0x4e3d) {
1459                 errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next()");
1460             }
1461             // position changed, must change hashCode()
1462             if(clone.hashCode()==norm.hashCode()) {
1463                 errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next().hashCode()==copy.hashCode()");
1464             }
1465 
1466             // test compose() and decompose()
1467             StringBuffer tel;
1468             String nfkc, nfkd;
1469             tel=new StringBuffer("\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121");
1470             tel.insert(1,(char)0x0301);
1471 
1472             nfkc=Normalizer.compose(tel.toString(), true);
1473             nfkd=Normalizer.decompose(tel.toString(), true);
1474             if(
1475                 !nfkc.equals(Utility.unescape("TE\u0139TELTELTELTELTELTELTELTELTEL"))||
1476                 !nfkd.equals(Utility.unescape("TEL\u0301TELTELTELTELTELTELTELTELTEL"))
1477             ) {
1478                 errln("error in Normalizer::(de)compose(): wrong result(s)");
1479             }
1480 
1481             // test setIndex()
1482             ch=norm.setIndex(3);
1483             if(ch!=0x4e3d) {
1484                errln("error in Normalizer(CharacterIterator).setIndex(3)");
1485             }
1486 
1487             // test setText(CharacterIterator) and getText()
1488             String out, out2;
1489             clone.setText(iter);
1490 
1491             out = clone.getText();
1492             out2 = iter.getText();
1493             if( !out.equals(out2) ||
1494                 clone.startIndex()!=0||
1495                 clone.endIndex()!=iter.getLength()
1496             ) {
1497                 errln("error in Normalizer::setText() or Normalizer::getText()");
1498             }
1499 
1500             char[] fillIn1 = new char[clone.getLength()];
1501             char[] fillIn2 = new char[iter.getLength()];
1502             int len = clone.getText(fillIn1);
1503             iter.getText(fillIn2,0);
1504             if(!Utility.arrayRegionMatches(fillIn1,0,fillIn2,0,len)){
1505                 errln("error in Normalizer.getText(). Normalizer: "+
1506                                 Utility.hex(new String(fillIn1))+
1507                                 " Iter: " + Utility.hex(new String(fillIn2)));
1508             }
1509 
1510             clone.setText(fillIn1);
1511             len = clone.getText(fillIn2);
1512             if(!Utility.arrayRegionMatches(fillIn1,0,fillIn2,0,len)){
1513                 errln("error in Normalizer.setText() or Normalizer.getText()"+
1514                                 Utility.hex(new String(fillIn1))+
1515                                 " Iter: " + Utility.hex(new String(fillIn2)));
1516             }
1517 
1518             // test setText(UChar *), getUMode() and setMode()
1519             clone.setText(s);
1520             clone.setIndexOnly(1);
1521             clone.setMode(Normalizer.NFD);
1522             if(clone.getMode()!=Normalizer.NFD) {
1523                 errln("error in Normalizer::setMode() or Normalizer::getMode()");
1524             }
1525             if(clone.next()!=0x308 || clone.next()!=0x1100) {
1526                 errln("error in Normalizer::setText() or Normalizer::setMode()");
1527             }
1528 
1529             // test last()/previous() with an internal buffer overflow
1530             StringBuffer buf = new StringBuffer("aaaaaaaaaa");
1531             buf.setCharAt(10-1,'\u0308');
1532             clone.setText(buf);
1533             if(clone.last()!=0x308) {
1534                 errln("error in Normalizer(10*U+0308).last()");
1535             }
1536 
1537             // test UNORM_NONE
1538             norm.setMode(Normalizer.NONE);
1539             if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) {
1540                 errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
1541             }
1542             out=Normalizer.normalize(s, Normalizer.NONE);
1543             if(!out.equals(s)) {
1544                 errln("error in Normalizer::normalize(UNORM_NONE)");
1545             }
1546             ch = 0x1D15E;
1547             String exp = "\\U0001D157\\U0001D165";
1548             String ns = Normalizer.normalize(ch,Normalizer.NFC);
1549             if(!ns.equals(Utility.unescape(exp))){
1550                 errln("error in Normalizer.normalize(int,Mode)");
1551             }
1552             ns = Normalizer.normalize(ch,Normalizer.NFC,0);
1553             if(!ns.equals(Utility.unescape(exp))){
1554                 errln("error in Normalizer.normalize(int,Mode,int)");
1555             }
1556         }catch(Exception e){
1557             throw e;
1558         }
1559     }
1560 
1561     @Test
TestConcatenate()1562     public void TestConcatenate() {
1563 
1564         Object[][]cases=new Object[][]{
1565             /* mode, left, right, result */
1566             {
1567                 Normalizer.NFC,
1568                 "re",
1569                 "\u0301sum\u00e9",
1570                 "r\u00e9sum\u00e9"
1571             },
1572             {
1573                 Normalizer.NFC,
1574                 "a\u1100",
1575                 "\u1161bcdefghijk",
1576                 "a\uac00bcdefghijk"
1577             },
1578             /* ### TODO: add more interesting cases */
1579             {
1580                 Normalizer.NFD,
1581                 "\u03B1\u0345",
1582                 "\u0C4D\uD804\uDCBA\uD834\uDD69",  // 0C4D 110BA 1D169
1583                 "\u03B1\uD834\uDD69\uD804\uDCBA\u0C4D\u0345"  // 03B1 1D169 110BA 0C4D 0345
1584             }
1585         };
1586 
1587         String left, right, expect, result;
1588         Normalizer.Mode mode;
1589         int i;
1590 
1591         /* test concatenation */
1592         for(i=0; i<cases.length; ++i) {
1593             mode = (Normalizer.Mode)cases[i][0];
1594 
1595             left=(String)cases[i][1];
1596             right=(String)cases[i][2];
1597             expect=(String)cases[i][3];
1598             {
1599                 result=Normalizer.concatenate(left, right, mode,0);
1600                 if(!result.equals(expect)) {
1601                     errln("error in Normalizer.concatenate(), cases[] failed"
1602                           +", result==expect: expected: "
1603                           + hex(expect)+" =========> got: " + hex(result));
1604                 }
1605             }
1606             {
1607                 result=Normalizer.concatenate(left.toCharArray(), right.toCharArray(), mode,0);
1608                 if(!result.equals(expect)) {
1609                     errln("error in Normalizer.concatenate(), cases[] failed"
1610                           +", result==expect: expected: "
1611                           + hex(expect)+" =========> got: " + hex(result));
1612                 }
1613             }
1614         }
1615 
1616         mode= Normalizer.NFC; // (Normalizer.Mode)cases2[0][0];
1617         char[] destination = "My resume is here".toCharArray();
1618         left = "resume";
1619         right = "re\u0301sum\u00e9 is HERE";
1620         expect = "My r\u00e9sum\u00e9 is HERE";
1621 
1622         // Concatenates 're' with '\u0301sum\u00e9 is HERE' and places the result at
1623         // position 3 of string 'My resume is here'.
1624         Normalizer.concatenate(left.toCharArray(), 0, 2, right.toCharArray(), 2, 15,
1625                                          destination, 3, 17, mode, 0);
1626         if(!String.valueOf(destination).equals(expect)) {
1627             errln("error in Normalizer.concatenate(), cases2[] failed"
1628                   +", result==expect: expected: "
1629                   + hex(expect) + " =========> got: " + hex(destination));
1630         }
1631 
1632         // Error case when result of concatenation won't fit into destination array.
1633         try {
1634             Normalizer.concatenate(left.toCharArray(), 0, 2, right.toCharArray(), 2, 15,
1635                                          destination, 3, 16, mode, 0);
1636         } catch (IndexOutOfBoundsException e) {
1637             assertTrue("Normalizer.concatenate() failed", e.getMessage().equals("14"));
1638             return;
1639         }
1640         fail("Normalizer.concatenate() tested for failure but passed");
1641     }
1642 
1643     private final int RAND_MAX = 0x7fff;
1644 
1645     @Test
TestCheckFCD()1646     public void TestCheckFCD()
1647     {
1648       char[] FAST = {0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
1649                      0x0008, 0x0009, 0x000A};
1650 
1651       char[] FALSE = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
1652                       0x02B9, 0x0314, 0x0315, 0x0316};
1653 
1654       char[] TRUE = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
1655                      0x0050, 0x0730, 0x09EE, 0x1E10};
1656 
1657       char[][] datastr= { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
1658                           {0x0061, 0x030A, 0x00E2, 0x0323, 0},
1659                           {0x0061, 0x0323, 0x00E2, 0x0323, 0},
1660                           {0x0061, 0x0323, 0x1E05, 0x0302, 0}
1661                         };
1662       Normalizer.QuickCheckResult result[] = {Normalizer.YES, Normalizer.NO, Normalizer.NO, Normalizer.YES};
1663 
1664       char[] datachar= {        0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
1665                                 0x6a,
1666                                 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
1667                                 0xea,
1668                                 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
1669                                 0x0307, 0x0308, 0x0309, 0x030a,
1670                                 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
1671                                 0x0327, 0x0328, 0x0329, 0x032a,
1672                                 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
1673                                 0x1e07, 0x1e08, 0x1e09, 0x1e0a
1674                        };
1675 
1676       int count = 0;
1677 
1678       if (Normalizer.quickCheck(FAST,0,FAST.length, Normalizer.FCD,0) != Normalizer.YES)
1679         errln("Normalizer.quickCheck(FCD) failed: expected value for fast Normalizer.quickCheck is Normalizer.YES\n");
1680       if (Normalizer.quickCheck(FALSE,0, FALSE.length,Normalizer.FCD,0) != Normalizer.NO)
1681         errln("Normalizer.quickCheck(FCD) failed: expected value for error Normalizer.quickCheck is Normalizer.NO\n");
1682       if (Normalizer.quickCheck(TRUE,0,TRUE.length,Normalizer.FCD,0) != Normalizer.YES)
1683         errln("Normalizer.quickCheck(FCD) failed: expected value for correct Normalizer.quickCheck is Normalizer.YES\n");
1684 
1685 
1686       while (count < 4)
1687       {
1688         Normalizer.QuickCheckResult fcdresult = Normalizer.quickCheck(datastr[count],0,datastr[count].length, Normalizer.FCD,0);
1689         if (result[count] != fcdresult) {
1690             errln("Normalizer.quickCheck(FCD) failed: Data set "+ count
1691                     + " expected value "+ result[count]);
1692         }
1693         count ++;
1694       }
1695 
1696       /* random checks of long strings */
1697       //srand((unsigned)time( NULL ));
1698       Random rand = createRandom(); // use test framework's random
1699 
1700       for (count = 0; count < 50; count ++)
1701       {
1702         int size = 0;
1703         Normalizer.QuickCheckResult testresult = Normalizer.YES;
1704         char[] data= new char[20];
1705         char[] norm= new char[100];
1706         char[] nfd = new char[100];
1707         int normStart = 0;
1708         int nfdsize = 0;
1709         while (size != 19) {
1710           data[size] = datachar[rand.nextInt(RAND_MAX)*50/RAND_MAX];
1711           logln("0x"+data[size]);
1712           normStart += Normalizer.normalize(data,size,size+1,
1713                                               norm,normStart,100,
1714                                               Normalizer.NFD,0);
1715           size ++;
1716         }
1717         logln("\n");
1718 
1719         nfdsize = Normalizer.normalize(data,0,size, nfd,0,nfd.length,Normalizer.NFD,0);
1720         //    nfdsize = unorm_normalize(data, size, UNORM_NFD, UCOL_IGNORE_HANGUL,
1721         //                      nfd, 100, &status);
1722         if (nfdsize != normStart || Utility.arrayRegionMatches(nfd,0, norm,0,nfdsize) ==false) {
1723           testresult = Normalizer.NO;
1724         }
1725         if (testresult == Normalizer.YES) {
1726           logln("result Normalizer.YES\n");
1727         }
1728         else {
1729           logln("result Normalizer.NO\n");
1730         }
1731 
1732         if (Normalizer.quickCheck(data,0,data.length, Normalizer.FCD,0) != testresult) {
1733           errln("Normalizer.quickCheck(FCD) failed: expected "+ testresult +" for random data: "+hex(new String(data)) );
1734         }
1735       }
1736     }
1737 
1738 
1739     // reference implementation of Normalizer::compare
ref_norm_compare(String s1, String s2, int options)1740     private int ref_norm_compare(String s1, String s2, int options) {
1741         String t1, t2,r1,r2;
1742 
1743         int normOptions=options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
1744 
1745         if((options&Normalizer.COMPARE_IGNORE_CASE)!=0) {
1746             // NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
1747             r1 = Normalizer.decompose(s1,false,normOptions);
1748             r2 = Normalizer.decompose(s2,false,normOptions);
1749             r1 = UCharacter.foldCase(r1,options);
1750             r2 = UCharacter.foldCase(r2,options);
1751         }else{
1752             r1 = s1;
1753             r2 = s2;
1754         }
1755 
1756         t1 = Normalizer.decompose(r1, false, normOptions);
1757         t2 = Normalizer.decompose(r2, false, normOptions);
1758 
1759         if((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0) {
1760             UTF16.StringComparator comp
1761                     = new UTF16.StringComparator(true, false,
1762                                      UTF16.StringComparator.FOLD_CASE_DEFAULT);
1763             return comp.compare(t1,t2);
1764         } else {
1765             return t1.compareTo(t2);
1766         }
1767 
1768     }
1769 
1770     // test wrapper for Normalizer::compare, sets UNORM_INPUT_IS_FCD appropriately
norm_compare(String s1, String s2, int options)1771     private int norm_compare(String s1, String s2, int options) {
1772         int normOptions=options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
1773 
1774         if( Normalizer.YES==Normalizer.quickCheck(s1,Normalizer.FCD,normOptions) &&
1775             Normalizer.YES==Normalizer.quickCheck(s2,Normalizer.FCD,normOptions)) {
1776             options|=Normalizer.INPUT_IS_FCD;
1777         }
1778 
1779         int cmpStrings = Normalizer.compare(s1, s2, options);
1780         int cmpArrays = Normalizer.compare(
1781                 s1.toCharArray(), 0, s1.length(),
1782                 s2.toCharArray(), 0, s2.length(), options);
1783         assertEquals("compare strings == compare char arrays", cmpStrings, cmpArrays);
1784         return cmpStrings;
1785     }
1786 
1787     // reference implementation of UnicodeString::caseCompare
ref_case_compare(String s1, String s2, int options)1788     private int ref_case_compare(String s1, String s2, int options) {
1789         String t1, t2;
1790 
1791         t1=s1;
1792         t2=s2;
1793 
1794         t1 = UCharacter.foldCase(t1,((options&Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I)==0));
1795         t2 = UCharacter.foldCase(t2,((options&Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I)==0));
1796 
1797         if((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0) {
1798             UTF16.StringComparator comp
1799                     = new UTF16.StringComparator(true, false,
1800                                     UTF16.StringComparator.FOLD_CASE_DEFAULT);
1801             return comp.compare(t1,t2);
1802         } else {
1803             return t1.compareTo(t2);
1804         }
1805 
1806     }
1807 
1808     // reduce an integer to -1/0/1
sign(int value)1809     private static int sign(int value) {
1810         if(value==0) {
1811             return 0;
1812         } else {
1813             return (value>>31)|1;
1814         }
1815     }
signString(int value)1816     private static String signString(int value) {
1817         if(value<0) {
1818             return "<0";
1819         } else if(value==0) {
1820             return "=0";
1821         } else /* value>0 */ {
1822             return ">0";
1823         }
1824     }
1825     // test Normalizer::compare and unorm_compare (thinly wrapped by the former)
1826     // by comparing it with its semantic equivalent
1827     // since we trust the pieces, this is sufficient
1828 
1829     // test each string with itself and each other
1830     // each time with all options
1831     private  String strings[]=new String[]{
1832                 // some cases from NormalizationTest.txt
1833                 // 0..3
1834                 "D\u031B\u0307\u0323",
1835                 "\u1E0C\u031B\u0307",
1836                 "D\u031B\u0323\u0307",
1837                 "d\u031B\u0323\u0307",
1838 
1839                 // 4..6
1840                 "\u00E4",
1841                 "a\u0308",
1842                 "A\u0308",
1843 
1844                 // Angstrom sign = A ring
1845                 // 7..10
1846                 "\u212B",
1847                 "\u00C5",
1848                 "A\u030A",
1849                 "a\u030A",
1850 
1851                 // 11.14
1852                 "a\u059A\u0316\u302A\u032Fb",
1853                 "a\u302A\u0316\u032F\u059Ab",
1854                 "a\u302A\u0316\u032F\u059Ab",
1855                 "A\u059A\u0316\u302A\u032Fb",
1856 
1857                 // from ICU case folding tests
1858                 // 15..20
1859                 "A\u00df\u00b5\ufb03\\U0001040c\u0131",
1860                 "ass\u03bcffi\\U00010434i",
1861                 "\u0061\u0042\u0131\u03a3\u00df\ufb03\ud93f\udfff",
1862                 "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udfff",
1863                 "\u0041\u0062\u0131\u03c3\u0053\u0073\u0066\u0046\u0069\ud93f\udfff",
1864                 "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udffd",
1865 
1866                 //     U+d800 U+10001   see implementation comment in unorm_cmpEquivFold
1867                 // vs. U+10000          at bottom - code point order
1868                 // 21..22
1869                 "\ud800\ud800\udc01",
1870                 "\ud800\udc00",
1871 
1872                 // other code point order tests from ustrtest.cpp
1873                 // 23..31
1874                 "\u20ac\ud801",
1875                 "\u20ac\ud800\udc00",
1876                 "\ud800",
1877                 "\ud800\uff61",
1878                 "\udfff",
1879                 "\uff61\udfff",
1880                 "\uff61\ud800\udc02",
1881                 "\ud800\udc02",
1882                 "\ud84d\udc56",
1883 
1884                 // long strings, see cnormtst.c/TestNormCoverage()
1885                 // equivalent if case-insensitive
1886                 // 32..33
1887                 "\uAD8B\uAD8B\uAD8B\uAD8B"+
1888                 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1889                 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1890                 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1891                 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1892                 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1893                 "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1894                 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1895                 "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1896                 "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1897                 "\uAD8B\uAD8B\uAD8B\uAD8B"+
1898                 "d\u031B\u0307\u0323",
1899 
1900                 "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"+
1901                 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1902                 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1903                 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1904                 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1905                 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1906                 "aaaaaaaaaaAAAAAAAAZZZZZZZZZZZZZZZZzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1907                 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1908                 "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1909                 "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1910                 "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"+
1911                 "\u1E0C\u031B\u0307",
1912 
1913                 // some strings that may make a difference whether the compare function
1914                 // case-folds or decomposes first
1915                 // 34..41
1916                 "\u0360\u0345\u0334",
1917                 "\u0360\u03b9\u0334",
1918 
1919                 "\u0360\u1f80\u0334",
1920                 "\u0360\u03b1\u0313\u03b9\u0334",
1921 
1922                 "\u0360\u1ffc\u0334",
1923                 "\u0360\u03c9\u03b9\u0334",
1924 
1925                 "a\u0360\u0345\u0360\u0345b",
1926                 "a\u0345\u0360\u0345\u0360b",
1927 
1928                 // interesting cases for canonical caseless match with turkic i handling
1929                 // 42..43
1930                 "\u00cc",
1931                 "\u0069\u0300",
1932 
1933                 // strings with post-Unicode 3.2 normalization or normalization corrections
1934                 // 44..45
1935                 "\u00e4\u193b\\U0002f868",
1936                 "\u0061\u193b\u0308\u36fc",
1937 
1938 
1939     };
1940 
1941     // all combinations of options
1942     // UNORM_INPUT_IS_FCD is set automatically if both input strings fulfill FCD conditions
1943     final class Temp {
1944         int options;
1945         String name;
Temp(int opt,String str)1946         public Temp(int opt,String str){
1947             options =opt;
1948             name = str;
1949         }
1950 
1951     }
1952     // set UNORM_UNICODE_3_2 in one additional combination
1953 
1954     private Temp[] opt = new Temp[]{
1955                     new Temp(0,"default"),
1956                     new Temp(Normalizer.COMPARE_CODE_POINT_ORDER, "code point order" ),
1957                     new Temp(Normalizer.COMPARE_IGNORE_CASE, "ignore case" ),
1958                     new Temp(Normalizer.COMPARE_CODE_POINT_ORDER|Normalizer.COMPARE_IGNORE_CASE, "code point order & ignore case" ),
1959                     new Temp(Normalizer.COMPARE_IGNORE_CASE|Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I, "ignore case & special i"),
1960                     new Temp(Normalizer.COMPARE_CODE_POINT_ORDER|Normalizer.COMPARE_IGNORE_CASE|Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I, "code point order & ignore case & special i"),
1961                     new Temp(Normalizer.UNICODE_3_2 << Normalizer.COMPARE_NORM_OPTIONS_SHIFT, "Unicode 3.2")
1962             };
1963 
1964 
1965     @Test
TestCompareDebug()1966     public void TestCompareDebug(){
1967 
1968         String[] s = new String[100]; // at least as many items as in strings[] !
1969 
1970 
1971         int i, j, k, count=strings.length;
1972         int result, refResult;
1973 
1974         // create the UnicodeStrings
1975         for(i=0; i<count; ++i) {
1976             s[i]=Utility.unescape(strings[i]);
1977         }
1978         UTF16.StringComparator comp = new UTF16.StringComparator(true, false,
1979                                      UTF16.StringComparator.FOLD_CASE_DEFAULT);
1980         // test them each with each other
1981 
1982         i = 42;
1983         j = 43;
1984         k = 2;
1985         // test Normalizer::compare
1986         result=norm_compare(s[i], s[j], opt[k].options);
1987         refResult=ref_norm_compare(s[i], s[j], opt[k].options);
1988         if(sign(result)!=sign(refResult)) {
1989             errln("Normalizer::compare( " + i +", "+j + ", " +k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
1990         }
1991 
1992         // test UnicodeString::caseCompare - same internal implementation function
1993          if(0!=(opt[k].options&Normalizer.COMPARE_IGNORE_CASE)) {
1994         //    result=s[i]. (s[j], opt[k].options);
1995             if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
1996             {
1997                 comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
1998             }
1999             else {
2000                 comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2001             }
2002 
2003             result=comp.compare(s[i],s[j]);
2004             refResult=ref_case_compare(s[i], s[j], opt[k].options);
2005             if(sign(result)!=sign(refResult)) {
2006                       errln("Normalizer::compare( " + i +", "+j + ", "+k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2007                             }
2008         }
2009         String value1 = "\u00dater\u00fd";
2010         String value2 = "\u00fater\u00fd";
2011         if(Normalizer.compare(value1,value2,0)!=0){
2012             if(Normalizer.compare(value1,value2,Normalizer.COMPARE_IGNORE_CASE)==0){
2013 
2014             }
2015         }
2016     }
2017 
2018     @Test
TestCompare()2019     public void TestCompare() {
2020 
2021         String[] s = new String[100]; // at least as many items as in strings[] !
2022 
2023         int i, j, k, count=strings.length;
2024         int result, refResult;
2025 
2026         // create the UnicodeStrings
2027         for(i=0; i<count; ++i) {
2028             s[i]=Utility.unescape(strings[i]);
2029         }
2030         UTF16.StringComparator comp = new UTF16.StringComparator();
2031         // test them each with each other
2032         for(i=0; i<count; ++i) {
2033             for(j=i; j<count; ++j) {
2034                 for(k=0; k<opt.length; ++k) {
2035                     // test Normalizer::compare
2036                     result=norm_compare(s[i], s[j], opt[k].options);
2037                     refResult=ref_norm_compare(s[i], s[j], opt[k].options);
2038                     if(sign(result)!=sign(refResult)) {
2039                         errln("Normalizer::compare( " + i +", "+j + ", " +k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2040                     }
2041 
2042                     // test UnicodeString::caseCompare - same internal implementation function
2043                      if(0!=(opt[k].options&Normalizer.COMPARE_IGNORE_CASE)) {
2044                         //    result=s[i]. (s[j], opt[k].options);
2045                         if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
2046                         {
2047                             comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
2048                         }
2049                         else {
2050                             comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2051                         }
2052 
2053                         comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2054                         // result=comp.caseCompare(s[i],s[j], opt[k].options);
2055                         result=comp.compare(s[i],s[j]);
2056                         refResult=ref_case_compare(s[i], s[j], opt[k].options);
2057                         if(sign(result)!=sign(refResult)) {
2058                                   errln("Normalizer::compare( " + i +", "+j + ", "+k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2059                                          }
2060                     }
2061                 }
2062             }
2063         }
2064 
2065         // test cases with i and I to make sure Turkic works
2066         char[] iI= new char[]{ 0x49, 0x69, 0x130, 0x131 };
2067         UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
2068         Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
2069         nfcImpl.ensureCanonIterData();
2070 
2071         String s1, s2;
2072 
2073         // collect all sets into one for contiguous output
2074         for(i=0; i<iI.length; ++i) {
2075             if(nfcImpl.getCanonStartSet(iI[i], iSet)) {
2076                 set.addAll(iSet);
2077             }
2078         }
2079 
2080         // test all of these precomposed characters
2081         Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
2082         UnicodeSetIterator it = new UnicodeSetIterator(set);
2083         int c;
2084         while(it.next() && (c=it.codepoint)!=UnicodeSetIterator.IS_STRING) {
2085             s1 = UTF16.valueOf(c);
2086             s2 = nfcNorm2.getDecomposition(c);
2087             for(k=0; k<opt.length; ++k) {
2088                 // test Normalizer::compare
2089 
2090                 result= norm_compare(s1, s2, opt[k].options);
2091                 refResult=ref_norm_compare(s1, s2, opt[k].options);
2092                 if(sign(result)!=sign(refResult)) {
2093                     errln("Normalizer.compare(U+"+hex(c)+" with its NFD, "+opt[k].name+")"
2094                           + signString(result)+" should be "+signString(refResult));
2095                 }
2096 
2097                 // test UnicodeString::caseCompare - same internal implementation function
2098                 if((opt[k].options & Normalizer.COMPARE_IGNORE_CASE)>0) {
2099                      if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
2100                     {
2101                         comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
2102                     }
2103                     else {
2104                         comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2105                     }
2106 
2107                     comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2108 
2109                     result=comp.compare(s1,s2);
2110                     refResult=ref_case_compare(s1, s2, opt[k].options);
2111                     if(sign(result)!=sign(refResult)) {
2112                         errln("UTF16.compare(U+"+hex(c)+" with its NFD, "
2113                               +opt[k].name+")"+signString(result) +" should be "+signString(refResult));
2114                     }
2115                 }
2116             }
2117         }
2118 
2119         // test getDecomposition() for some characters that do not decompose
2120         if( nfcNorm2.getDecomposition(0x20)!=null ||
2121             nfcNorm2.getDecomposition(0x4e00)!=null ||
2122             nfcNorm2.getDecomposition(0x20002)!=null
2123         ) {
2124             errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
2125         }
2126 
2127         // test getRawDecomposition() for some characters that do not decompose
2128         if( nfcNorm2.getRawDecomposition(0x20)!=null ||
2129             nfcNorm2.getRawDecomposition(0x4e00)!=null ||
2130             nfcNorm2.getRawDecomposition(0x20002)!=null
2131         ) {
2132             errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
2133         }
2134 
2135         // test composePair() for some pairs of characters that do not compose
2136         if( nfcNorm2.composePair(0x20, 0x301)>=0 ||
2137             nfcNorm2.composePair(0x61, 0x305)>=0 ||
2138             nfcNorm2.composePair(0x1100, 0x1160)>=0 ||
2139             nfcNorm2.composePair(0xac00, 0x11a7)>=0
2140         ) {
2141             errln("NFC.composePair() incorrectly composes some pairs of characters");
2142         }
2143 
2144         // test FilteredNormalizer2.getDecomposition()
2145         UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff]");
2146         FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2147         if(fn2.getDecomposition(0xe4)!=null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
2148             errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
2149         }
2150 
2151         // test FilteredNormalizer2.getRawDecomposition()
2152         if(fn2.getRawDecomposition(0xe4)!=null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
2153             errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
2154         }
2155 
2156         // test FilteredNormalizer2::composePair()
2157         if( 0x100!=fn2.composePair(0x41, 0x304) ||
2158             fn2.composePair(0xc7, 0x301)>=0 // unfiltered result: U+1E08
2159         ) {
2160             errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
2161         }
2162     }
2163 
2164     // verify that case-folding does not un-FCD strings
countFoldFCDExceptions(int foldingOptions)2165     int countFoldFCDExceptions(int foldingOptions) {
2166         String s, d;
2167         int c;
2168         int count;
2169         int/*unsigned*/ cc, trailCC, foldCC, foldTrailCC;
2170         Normalizer.QuickCheckResult qcResult;
2171         int category;
2172         boolean isNFD;
2173 
2174 
2175         logln("Test if case folding may un-FCD a string (folding options 0x)"+hex(foldingOptions));
2176 
2177         count=0;
2178         for(c=0; c<=0x10ffff; ++c) {
2179             category=UCharacter.getType(c);
2180             if(category==UCharacterCategory.UNASSIGNED) {
2181                 continue; // skip unassigned code points
2182             }
2183             if(c==0xac00) {
2184                 c=0xd7a3; // skip Hangul - no case folding there
2185                 continue;
2186             }
2187             // skip Han blocks - no case folding there either
2188             if(c==0x3400) {
2189                 c=0x4db5;
2190                 continue;
2191             }
2192             if(c==0x4e00) {
2193                 c=0x9fa5;
2194                 continue;
2195             }
2196             if(c==0x20000) {
2197                 c=0x2a6d6;
2198                 continue;
2199             }
2200 
2201             s= UTF16.valueOf(c);
2202 
2203             // get leading and trailing cc for c
2204             d= Normalizer.decompose(s,false);
2205             isNFD= s==d;
2206             cc=UCharacter.getCombiningClass(UTF16.charAt(d,0));
2207             trailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));
2208 
2209             // get leading and trailing cc for the case-folding of c
2210             UCharacter.foldCase(s,(foldingOptions==0));
2211             d = Normalizer.decompose(s, false);
2212             foldCC=UCharacter.getCombiningClass(UTF16.charAt(d,0));
2213             foldTrailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));
2214 
2215             qcResult=Normalizer.quickCheck(s, Normalizer.FCD,0);
2216 
2217 
2218             // bad:
2219             // - character maps to empty string: adjacent characters may then need reordering
2220             // - folding has different leading/trailing cc's, and they don't become just 0
2221             // - folding itself is not FCD
2222             if( qcResult!=Normalizer.YES ||
2223                 s.length()==0 ||
2224                 (cc!=foldCC && foldCC!=0) || (trailCC!=foldTrailCC && foldTrailCC!=0)
2225             ) {
2226                 ++count;
2227                 errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
2228                 //errln("  cc %02x trailCC %02x    foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x   quickCheck(folded)=%d", cc, trailCC, UTF16.charAt(d,0), foldCC, UTF16.charAt(d,d.length()-1), foldTrailCC, qcResult);
2229                 continue;
2230             }
2231 
2232             // also bad:
2233             // if a code point is in NFD but its case folding is not, then
2234             // unorm_compare will also fail
2235             if(isNFD && Normalizer.YES!=Normalizer.quickCheck(s, Normalizer.NFD,0)) {
2236                 ++count;
2237                 errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
2238             }
2239         }
2240 
2241         logln("There are "+hex(count)+" code points for which case-folding may un-FCD a string (folding options"+foldingOptions+"x)" );
2242         return count;
2243     }
2244 
2245     @Test
TestFindFoldFCDExceptions()2246     public void TestFindFoldFCDExceptions() {
2247         int count;
2248 
2249         count=countFoldFCDExceptions(0);
2250         count+=countFoldFCDExceptions(Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I);
2251         if(count>0) {
2252             //*
2253             //* If case-folding un-FCDs any strings, then unorm_compare() must be
2254             //* re-implemented.
2255             //* It currently assumes that one can check for FCD then case-fold
2256             //* and then still have FCD strings for raw decomposition without reordering.
2257             //*
2258             errln("error: There are "+count+" code points for which case-folding"+
2259                   " may un-FCD a string for all folding options.\n See comment"+
2260                   " in BasicNormalizerTest::FindFoldFCDExceptions()!");
2261         }
2262     }
2263 
2264     @Test
TestCombiningMarks()2265     public void TestCombiningMarks(){
2266         String src = "\u0f71\u0f72\u0f73\u0f74\u0f75";
2267         String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
2268         String result = Normalizer.decompose(src,false);
2269         if(!expected.equals(result)){
2270             errln("Reordering of combining marks failed. Expected: "+Utility.hex(expected)+" Got: "+ Utility.hex(result));
2271         }
2272     }
2273 
2274     /*
2275      * Re-enable this test when UTC fixes UAX 21
2276     @Test
2277     public void TestUAX21Failure(){
2278         final String[][] cases = new String[][]{
2279                 {"\u0061\u0345\u0360\u0345\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2280                 {"\u0061\u0345\u0345\u0360\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2281                 {"\u0061\u0345\u0360\u0362\u0360\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2282                 {"\u0061\u0360\u0345\u0360\u0362\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2283                 {"\u0061\u0345\u0360\u0362\u0361\u0062", "\u0061\u0362\u0360\u0361\u0345\u0062"},
2284                 {"\u0061\u0361\u0345\u0360\u0362\u0062", "\u0061\u0362\u0361\u0360\u0345\u0062"},
2285         };
2286         for(int i = 0; i< cases.length; i++){
2287             String s1 =cases[0][0];
2288             String s2 = cases[0][1];
2289             if( (Normalizer.compare(s1,s2,Normalizer.FOLD_CASE_DEFAULT ==0)//case sensitive compare
2290                 &&
2291                 (Normalizer.compare(s1,s2,Normalizer.COMPARE_IGNORE_CASE)!=0)){
2292                 errln("Normalizer.compare() failed for s1: "
2293                         + Utility.hex(s1) +" s2: " + Utility.hex(s2));
2294             }
2295         }
2296     }
2297     */
2298     @Test
TestFCNFKCClosure()2299     public void TestFCNFKCClosure() {
2300         final class TestStruct{
2301             int c;
2302             String s;
2303             TestStruct(int cp, String src){
2304                 c=cp;
2305                 s=src;
2306             }
2307         }
2308 
2309         TestStruct[] tests= new TestStruct[]{
2310             new TestStruct( 0x00C4, "" ),
2311             new TestStruct( 0x00E4, "" ),
2312             new TestStruct( 0x037A, "\u0020\u03B9" ),
2313             new TestStruct( 0x03D2, "\u03C5" ),
2314             new TestStruct( 0x20A8, "\u0072\u0073" ) ,
2315             new TestStruct( 0x210B, "\u0068" ),
2316             new TestStruct( 0x210C, "\u0068" ),
2317             new TestStruct( 0x2121, "\u0074\u0065\u006C" ),
2318             new TestStruct( 0x2122, "\u0074\u006D" ),
2319             new TestStruct( 0x2128, "\u007A" ),
2320             new TestStruct( 0x1D5DB,"\u0068" ),
2321             new TestStruct( 0x1D5ED,"\u007A" ),
2322             new TestStruct( 0x0061, "" )
2323         };
2324 
2325 
2326         for(int i = 0; i < tests.length; ++ i) {
2327             String result=Normalizer.getFC_NFKC_Closure(tests[i].c);
2328             if(!result.equals(new String(tests[i].s))) {
2329                 errln("getFC_NFKC_Closure(U+"+Integer.toHexString(tests[i].c)+") is wrong");
2330             }
2331         }
2332 
2333         /* error handling */
2334 
2335         int length=Normalizer.getFC_NFKC_Closure(0x5c, null);
2336         if(length!=0){
2337             errln("getFC_NFKC_Closure did not perform error handling correctly");
2338         }
2339     }
2340     @Test
TestBugJ2324()2341     public void TestBugJ2324(){
2342        /* String[] input = new String[]{
2343                             //"\u30FD\u3099",
2344                             "\u30FA\u309A",
2345                             "\u30FB\u309A",
2346                             "\u30FC\u309A",
2347                             "\u30FE\u309A",
2348                             "\u30FD\u309A",
2349 
2350         };*/
2351         String troublesome = "\u309A";
2352         for(int i=0x3000; i<0x3100;i++){
2353             String input = ((char)i)+troublesome;
2354             try{
2355               /*  String result =*/ Normalizer.compose(input,false);
2356             }catch(IndexOutOfBoundsException e){
2357                 errln("compose() failed for input: " + Utility.hex(input) + " Exception: " + e.toString());
2358             }
2359         }
2360 
2361     }
2362 
2363     static final int D = 0, C = 1, KD= 2, KC = 3, FCD=4, NONE=5;
2364 
initSkippables(UnicodeSet[] skipSets)2365     private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets) {
2366         skipSets[D].applyPattern("[[:NFD_QC=Yes:]&[:ccc=0:]]", false);
2367         skipSets[C].applyPattern("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
2368         skipSets[KD].applyPattern("[[:NFKD_QC=Yes:]&[:ccc=0:]]", false);
2369         skipSets[KC].applyPattern("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
2370 
2371         // Remove from the NFC and NFKC sets all those characters that change
2372         // when a back-combining character is added.
2373         // First, get all of the back-combining characters and their combining classes.
2374         UnicodeSet combineBack=new UnicodeSet("[:NFC_QC=Maybe:]");
2375         int numCombineBack=combineBack.size();
2376         int[] combineBackCharsAndCc=new int[numCombineBack*2];
2377         UnicodeSetIterator iter=new UnicodeSetIterator(combineBack);
2378         for(int i=0; i<numCombineBack; ++i) {
2379             iter.next();
2380             int c=iter.codepoint;
2381             combineBackCharsAndCc[2*i]=c;
2382             combineBackCharsAndCc[2*i+1]=UCharacter.getCombiningClass(c);
2383         }
2384 
2385         // We need not look at control codes, Han characters nor Hangul LVT syllables because they
2386         // do not combine forward. LV syllables are already removed.
2387         UnicodeSet notInteresting=new UnicodeSet("[[:C:][:Unified_Ideograph:][:HST=LVT:]]");
2388         UnicodeSet unsure=((UnicodeSet)(skipSets[C].clone())).removeAll(notInteresting);
2389         // System.out.format("unsure.size()=%d\n", unsure.size());
2390 
2391         // For each character about which we are unsure, see if it changes when we add
2392         // one of the back-combining characters.
2393         Normalizer2 norm2=Normalizer2.getNFCInstance();
2394         StringBuilder s=new StringBuilder();
2395         iter.reset(unsure);
2396         while(iter.next()) {
2397             int c=iter.codepoint;
2398             s.delete(0, 0x7fffffff).appendCodePoint(c);
2399             int cLength=s.length();
2400             int tccc=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
2401             for(int i=0; i<numCombineBack; ++i) {
2402                 // If c's decomposition ends with a character with non-zero combining class, then
2403                 // c can only change if it combines with a character with a non-zero combining class.
2404                 int cc2=combineBackCharsAndCc[2*i+1];
2405                 if(tccc==0 || cc2!=0) {
2406                     int c2=combineBackCharsAndCc[2*i];
2407                     s.appendCodePoint(c2);
2408                     if(!norm2.isNormalized(s)) {
2409                         // System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2);
2410                         skipSets[C].remove(c);
2411                         skipSets[KC].remove(c);
2412                         break;
2413                     }
2414                     s.delete(cLength, 0x7fffffff);
2415                 }
2416             }
2417         }
2418         return skipSets;
2419     }
2420 
2421     private static String[] kModeStrings = {
2422         "D", "C", "KD", "KC"
2423     };
2424 
2425     @Test
TestSkippable()2426     public void TestSkippable() {
2427         UnicodeSet[] skipSets = new UnicodeSet[] {
2428             new UnicodeSet(), //NFD
2429             new UnicodeSet(), //NFC
2430             new UnicodeSet(), //NFKD
2431             new UnicodeSet()  //NFKC
2432         };
2433         UnicodeSet[] expectSets = new UnicodeSet[] {
2434             new UnicodeSet(),
2435             new UnicodeSet(),
2436             new UnicodeSet(),
2437             new UnicodeSet()
2438         };
2439         StringBuilder s, pattern;
2440 
2441         // build NF*Skippable sets from runtime data
2442         skipSets[D].applyPattern("[:NFD_Inert:]");
2443         skipSets[C].applyPattern("[:NFC_Inert:]");
2444         skipSets[KD].applyPattern("[:NFKD_Inert:]");
2445         skipSets[KC].applyPattern("[:NFKC_Inert:]");
2446 
2447         expectSets = initSkippables(expectSets);
2448         if(expectSets[D].contains(0x0350)){
2449             errln("expectSets[D] contains 0x0350");
2450         }
2451         for(int i=0; i<expectSets.length; ++i) {
2452             if(!skipSets[i].equals(expectSets[i])) {
2453                 String ms = kModeStrings[i];
2454                 errln("error: TestSkippable skipSets["+ms+"]!=expectedSets["+ms+"]\n");
2455                 // Note: This used to depend on hardcoded UnicodeSet patterns generated by
2456                 // Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by
2457                 // running com.ibm.text.UCD.Main with the option NFSkippable.
2458                 // Since ICU 4.6/Unicode 6, we are generating the
2459                 // expectSets ourselves in initSkippables().
2460 
2461                 s=new StringBuilder();
2462 
2463                 s.append("\n\nskip=       ");
2464                 s.append(skipSets[i].toPattern(true));
2465                 s.append("\n\n");
2466 
2467                 s.append("skip-expect=");
2468                 pattern = new StringBuilder(((UnicodeSet)skipSets[i].clone()).removeAll(expectSets[i]).toPattern(true));
2469                 s.append(pattern);
2470 
2471                 pattern.delete(0,pattern.length());
2472                 s.append("\n\nexpect-skip=");
2473                 pattern = new StringBuilder(((UnicodeSet)expectSets[i].clone()).removeAll(skipSets[i]).toPattern(true));
2474                 s.append(pattern);
2475                 s.append("\n\n");
2476 
2477                 pattern.delete(0,pattern.length());
2478                 s.append("\n\nintersection(expect,skip)=");
2479                 UnicodeSet intersection  = ((UnicodeSet) expectSets[i].clone()).retainAll(skipSets[i]);
2480                 pattern = new StringBuilder(intersection.toPattern(true));
2481                 s.append(pattern);
2482                 // Special: test coverage for append(char).
2483                 s.append('\n');
2484                 s.append('\n');
2485 
2486                 errln(s.toString());
2487             }
2488         }
2489     }
2490 
2491     @Test
TestBugJ2068()2492     public void TestBugJ2068(){
2493         String sample = "The quick brown fox jumped over the lazy dog";
2494         UCharacterIterator text = UCharacterIterator.getInstance(sample);
2495         Normalizer norm = new Normalizer(text,Normalizer.NFC,0);
2496         text.setIndex(4);
2497         if(text.current() == norm.current()){
2498             errln("Normalizer is not cloning the UCharacterIterator");
2499         }
2500      }
2501     @Test
TestGetCombiningClass()2502      public void TestGetCombiningClass(){
2503         for(int i=0;i<0x10FFFF;i++){
2504             int cc = UCharacter.getCombiningClass(i);
2505             if(0xD800<= i && i<=0xDFFF && cc >0 ){
2506                 cc = UCharacter.getCombiningClass(i);
2507                 errln("CC: "+ cc + " for codepoint: " +Utility.hex(i,8));
2508             }
2509         }
2510     }
2511 
2512     @Test
TestSerializedSet()2513     public void TestSerializedSet(){
2514         USerializedSet sset=new USerializedSet();
2515         UnicodeSet set = new UnicodeSet();
2516         int start, end;
2517 
2518         char[] serialized = {
2519             0x8007,  // length
2520             3,  // bmpLength
2521             0xc0, 0xfe, 0xfffc,
2522             1, 9, 0x10, 0xfffc
2523         };
2524         sset.getSet(serialized, 0);
2525 
2526         // collect all sets into one for contiguous output
2527         int[] startEnd = new int[2];
2528         int count=sset.countRanges();
2529         for(int j=0; j<count; ++j) {
2530             sset.getRange(j, startEnd);
2531             set.add(startEnd[0], startEnd[1]);
2532         }
2533 
2534         // test all of these characters
2535         UnicodeSetIterator it = new UnicodeSetIterator(set);
2536         while(it.nextRange() && it.codepoint!=UnicodeSetIterator.IS_STRING) {
2537             start=it.codepoint;
2538             end=it.codepointEnd;
2539             while(start<=end) {
2540                 if(!sset.contains(start)){
2541                     errln("USerializedSet.contains failed for "+Utility.hex(start,8));
2542                 }
2543                 ++start;
2544             }
2545         }
2546     }
2547 
2548     @Test
TestReturnFailure()2549     public void TestReturnFailure(){
2550         char[] term = {'r','\u00e9','s','u','m','\u00e9' };
2551         char[] decomposed_term = new char[10 + term.length + 2];
2552         int rc = Normalizer.decompose(term,0,term.length, decomposed_term,0,decomposed_term.length,true, 0);
2553         int rc1 = Normalizer.decompose(term,0,term.length, decomposed_term,10,decomposed_term.length,true, 0);
2554         if(rc!=rc1){
2555             errln("Normalizer decompose did not return correct length");
2556         }
2557     }
2558 
2559     private final static class TestCompositionCase {
2560         public Normalizer.Mode mode;
2561         public int options;
2562         public String input, expect;
TestCompositionCase(Normalizer.Mode mode, int options, String input, String expect)2563         TestCompositionCase(Normalizer.Mode mode, int options, String input, String expect) {
2564             this.mode=mode;
2565             this.options=options;
2566             this.input=input;
2567             this.expect=expect;
2568         }
2569     }
2570 
2571     @Test
TestComposition()2572     public void TestComposition() {
2573         final TestCompositionCase cases[]=new TestCompositionCase[]{
2574             /*
2575              * special cases for UAX #15 bug
2576              * see Unicode Corrigendum #5: Normalization Idempotency
2577              * at http://unicode.org/versions/corrigendum5.html
2578              * (was Public Review Issue #29)
2579              */
2580             new TestCompositionCase(Normalizer.NFC, 0, "\u1100\u0300\u1161\u0327",      "\u1100\u0300\u1161\u0327"),
2581             new TestCompositionCase(Normalizer.NFC, 0, "\u1100\u0300\u1161\u0327\u11a8","\u1100\u0300\u1161\u0327\u11a8"),
2582             new TestCompositionCase(Normalizer.NFC, 0, "\uac00\u0300\u0327\u11a8",      "\uac00\u0327\u0300\u11a8"),
2583             new TestCompositionCase(Normalizer.NFC, 0, "\u0b47\u0300\u0b3e",            "\u0b47\u0300\u0b3e"),
2584 
2585             /* TODO: add test cases for UNORM_FCC here (j2151) */
2586         };
2587 
2588         String output;
2589         int i;
2590 
2591         for(i=0; i<cases.length; ++i) {
2592             output=Normalizer.normalize(cases[i].input, cases[i].mode, cases[i].options);
2593             if(!output.equals(cases[i].expect)) {
2594                 errln("unexpected result for case "+i);
2595             }
2596         }
2597     }
2598 
2599     @Test
TestGetDecomposition()2600     public void TestGetDecomposition() {
2601         Normalizer2 n2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE_CONTIGUOUS);
2602         String decomp=n2.getDecomposition(0x20);
2603         assertEquals("fcc.getDecomposition(space) failed", null, decomp);
2604         decomp=n2.getDecomposition(0xe4);
2605         assertEquals("fcc.getDecomposition(a-umlaut) failed", "a\u0308", decomp);
2606         decomp=n2.getDecomposition(0xac01);
2607         assertEquals("fcc.getDecomposition(Hangul syllable U+AC01) failed", "\u1100\u1161\u11a8", decomp);
2608     }
2609 
2610     @Test
TestGetRawDecomposition()2611     public void TestGetRawDecomposition() {
2612         Normalizer2 n2=Normalizer2.getNFKCInstance();
2613         /*
2614          * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
2615          * without recursive decomposition.
2616          */
2617 
2618         String decomp=n2.getRawDecomposition(0x20);
2619         assertEquals("nfkc.getRawDecomposition(space) failed", null, decomp);
2620         decomp=n2.getRawDecomposition(0xe4);
2621         assertEquals("nfkc.getRawDecomposition(a-umlaut) failed", "a\u0308", decomp);
2622         /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
2623         decomp=n2.getRawDecomposition(0x1e08);
2624         assertEquals("nfkc.getRawDecomposition(c-cedilla-acute) failed", "\u00c7\u0301", decomp);
2625         /* U+212B ANGSTROM SIGN */
2626         decomp=n2.getRawDecomposition(0x212b);
2627         assertEquals("nfkc.getRawDecomposition(angstrom sign) failed", "\u00c5", decomp);
2628         decomp=n2.getRawDecomposition(0xac00);
2629         assertEquals("nfkc.getRawDecomposition(Hangul syllable U+AC00) failed", "\u1100\u1161", decomp);
2630         /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
2631         decomp=n2.getRawDecomposition(0xac01);
2632         assertEquals("nfkc.getRawDecomposition(Hangul syllable U+AC01) failed", "\uac00\u11a8", decomp);
2633     }
2634 
2635     @Test
TestCustomComp()2636     public void TestCustomComp() {
2637         String [][] pairs={
2638             // ICU 63 normalization with CodePointTrie requires inert surrogate code points.
2639             // { "\\uD801\\uE000\\uDFFE", "" },
2640             // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
2641             // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
2642             { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" },
2643             { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" },
2644             { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" },
2645 
2646             { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" },
2647             { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" },
2648             { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" },
2649             { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" },
2650             { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" }
2651         };
2652         Normalizer2 customNorm2;
2653         customNorm2=
2654             Normalizer2.getInstance(
2655                 BasicTest.class.getResourceAsStream("/ohos/global/icu/dev/data/testdata/testnorm.nrm"),
2656                 "testnorm",
2657                 Normalizer2.Mode.COMPOSE);
2658         for(int i=0; i<pairs.length; ++i) {
2659             String[] pair=pairs[i];
2660             String input=Utility.unescape(pair[0]);
2661             String expected=Utility.unescape(pair[1]);
2662             String result=customNorm2.normalize(input);
2663             if(!result.equals(expected)) {
2664                 errln("custom compose Normalizer2 did not normalize input "+i+" as expected");
2665             }
2666         }
2667     }
2668 
2669     @Test
TestCustomFCC()2670     public void TestCustomFCC() {
2671         String[][] pairs={
2672             // ICU 63 normalization with CodePointTrie requires inert surrogate code points.
2673             // { "\\uD801\\uE000\\uDFFE", "" },
2674             // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
2675             // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
2676             { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" },
2677             { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" },
2678             { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" },
2679 
2680             // The following expected result is different from CustomComp
2681             // because of only-contiguous composition.
2682             { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" },
2683             { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" },
2684             { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" },
2685             { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" },
2686             { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" }
2687         };
2688         Normalizer2 customNorm2;
2689         customNorm2=
2690             Normalizer2.getInstance(
2691                 BasicTest.class.getResourceAsStream("/ohos/global/icu/dev/data/testdata/testnorm.nrm"),
2692                 "testnorm",
2693                 Normalizer2.Mode.COMPOSE_CONTIGUOUS);
2694         for(int i=0; i<pairs.length; ++i) {
2695             String[] pair=pairs[i];
2696             String input=Utility.unescape(pair[0]);
2697             String expected=Utility.unescape(pair[1]);
2698             String result=customNorm2.normalize(input);
2699             if(!result.equals(expected)) {
2700                 errln("custom FCC Normalizer2 did not normalize input "+i+" as expected");
2701             }
2702         }
2703     }
2704 
2705     @Test
TestCanonIterData()2706     public void TestCanonIterData() {
2707         // For now, just a regression test.
2708         Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl.ensureCanonIterData();
2709         // U+0FB5 TIBETAN SUBJOINED LETTER SSA is the trailing character
2710         // in some decomposition mappings where there is a composition exclusion.
2711         // In fact, U+0FB5 is normalization-inert (NFC_QC=Yes, NFD_QC=Yes, ccc=0)
2712         // but it is not a segment starter because it occurs in a decomposition mapping.
2713         if(impl.isCanonSegmentStarter(0xfb5)) {
2714             errln("isCanonSegmentStarter(U+0fb5)=true is wrong");
2715         }
2716         // For [:Segment_Starter:] to work right, not just the property function has to work right,
2717         // UnicodeSet also needs a correct range starts set.
2718         UnicodeSet segStarters=new UnicodeSet("[:Segment_Starter:]").freeze();
2719         if(segStarters.contains(0xfb5)) {
2720             errln("[:Segment_Starter:].contains(U+0fb5)=true is wrong");
2721         }
2722         // Try characters up to Kana and miscellaneous CJK but below Han (for expediency).
2723         for(int c=0; c<=0x33ff; ++c) {
2724             boolean isStarter=impl.isCanonSegmentStarter(c);
2725             boolean isContained=segStarters.contains(c);
2726             if(isStarter!=isContained) {
2727                 errln(String.format(
2728                         "discrepancy: isCanonSegmentStarter(U+%04x)=%5b != " +
2729                         "[:Segment_Starter:].contains(same)",
2730                         c, isStarter));
2731             }
2732         }
2733     }
2734 
2735     @Test
TestFilteredNormalizer2()2736     public void TestFilteredNormalizer2() {
2737         Normalizer2 nfcNorm2=Normalizer2.getNFCInstance();
2738         UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
2739         FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2740         int c;
2741         for(c=0; c<=0x3ff; ++c) {
2742             int expectedCC= filter.contains(c) ? nfcNorm2.getCombiningClass(c) : 0;
2743             int cc=fn2.getCombiningClass(c);
2744             assertEquals(
2745                     "FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+"+hex(c)+
2746                     ")==filtered NFC.getCC()",
2747                     expectedCC, cc);
2748         }
2749 
2750         // More coverage.
2751         StringBuilder sb=new StringBuilder();
2752         assertEquals("filtered normalize()", "ää\u0304",
2753                 fn2.normalize("a\u0308ä\u0304", (Appendable)sb).toString());
2754         assertTrue("filtered hasBoundaryAfter()", fn2.hasBoundaryAfter('ä'));
2755         assertTrue("filtered isInert()", fn2.isInert(0x0313));
2756     }
2757 
2758     @Test
TestFilteredAppend()2759     public void TestFilteredAppend() {
2760         Normalizer2 nfcNorm2=Normalizer2.getNFCInstance();
2761         UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
2762         FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2763 
2764         // Append two strings that each contain a character outside the filter set.
2765         StringBuilder sb = new StringBuilder("a\u0313a");
2766         String second = "\u0301\u0313";
2767         assertEquals("append()", "a\u0313á\u0313", fn2.append(sb, second).toString());
2768 
2769         // Same, and also normalize the second string.
2770         sb.replace(0, 0x7fffffff, "a\u0313a");
2771         assertEquals(
2772             "normalizeSecondAndAppend()",
2773             "a\u0313á\u0313", fn2.normalizeSecondAndAppend(sb, second).toString());
2774 
2775         // Normalizer2.normalize(String) uses spanQuickCheckYes() and normalizeSecondAndAppend().
2776         assertEquals("normalize()", "a\u0313á\u0313", fn2.normalize("a\u0313a\u0301\u0313"));
2777     }
2778 
2779     @Test
TestGetEasyToUseInstance()2780     public void TestGetEasyToUseInstance() {
2781         // Test input string:
2782         // U+00A0 -> <noBreak> 0020
2783         // U+00C7 0301 = 1E08 = 0043 0327 0301
2784         String in="\u00A0\u00C7\u0301";
2785         Normalizer2 n2=Normalizer2.getNFCInstance();
2786         String out=n2.normalize(in);
2787         assertEquals(
2788                 "getNFCInstance() did not return an NFC instance " +
2789                 "(normalizes to " + prettify(out) + ')',
2790                 "\u00A0\u1E08", out);
2791 
2792         n2=Normalizer2.getNFDInstance();
2793         out=n2.normalize(in);
2794         assertEquals(
2795                 "getNFDInstance() did not return an NFD instance " +
2796                 "(normalizes to " + prettify(out) + ')',
2797                 "\u00A0C\u0327\u0301", out);
2798 
2799         n2=Normalizer2.getNFKCInstance();
2800         out=n2.normalize(in);
2801         assertEquals(
2802                 "getNFKCInstance() did not return an NFKC instance " +
2803                 "(normalizes to " + prettify(out) + ')',
2804                 " \u1E08", out);
2805 
2806         n2=Normalizer2.getNFKDInstance();
2807         out=n2.normalize(in);
2808         assertEquals(
2809                 "getNFKDInstance() did not return an NFKD instance " +
2810                 "(normalizes to " + prettify(out) + ')',
2811                 " C\u0327\u0301", out);
2812 
2813         n2=Normalizer2.getNFKCCasefoldInstance();
2814         out=n2.normalize(in);
2815         assertEquals(
2816                 "getNFKCCasefoldInstance() did not return an NFKC_Casefold instance " +
2817                 "(normalizes to " + prettify(out) + ')',
2818                 " \u1E09", out);
2819     }
2820 
2821     @Test
TestLowMappingToEmpty_D()2822     public void TestLowMappingToEmpty_D() {
2823         Normalizer2 n2 = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.DECOMPOSE);
2824         checkLowMappingToEmpty(n2);
2825 
2826         String sh = "\u00AD";
2827         assertFalse("soft hyphen is not normalized", n2.isNormalized(sh));
2828         String result = n2.normalize(sh);
2829         assertTrue("soft hyphen normalizes to empty", result.isEmpty());
2830         assertEquals("soft hyphen QC=No", Normalizer.NO, n2.quickCheck(sh));
2831         assertEquals("soft hyphen spanQuickCheckYes", 0, n2.spanQuickCheckYes(sh));
2832 
2833         String s = "\u00ADÄ\u00AD\u0323";
2834         result = n2.normalize(s);
2835         assertEquals("normalize string with soft hyphens", "a\u0323\u0308", result);
2836     }
2837 
2838     @Test
TestLowMappingToEmpty_FCD()2839     public void TestLowMappingToEmpty_FCD() {
2840         Normalizer2 n2 = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.FCD);
2841         checkLowMappingToEmpty(n2);
2842 
2843         String sh = "\u00AD";
2844         assertTrue("soft hyphen is FCD", n2.isNormalized(sh));
2845 
2846         String s = "\u00ADÄ\u00AD\u0323";
2847         String result = n2.normalize(s);
2848         assertEquals("normalize string with soft hyphens", "\u00ADa\u0323\u0308", result);
2849     }
2850 
checkLowMappingToEmpty(Normalizer2 n2)2851     private void checkLowMappingToEmpty(Normalizer2 n2) {
2852         String mapping = n2.getDecomposition(0xad);
2853         assertNotNull("getDecomposition(soft hyphen)", mapping);
2854         assertTrue("soft hyphen maps to empty", mapping.isEmpty());
2855         assertFalse("soft hyphen has no boundary before", n2.hasBoundaryBefore(0xad));
2856         assertFalse("soft hyphen has no boundary after", n2.hasBoundaryAfter(0xad));
2857         assertFalse("soft hyphen is not inert", n2.isInert(0xad));
2858     }
2859 
2860     @Test
TestNormalizeIllFormedText()2861     public void TestNormalizeIllFormedText() {
2862         Normalizer2 nfkc_cf = Normalizer2.getNFKCCasefoldInstance();
2863         // Normalization behavior for ill-formed text is not defined.
2864         // ICU currently treats ill-formed sequences as normalization-inert
2865         // and copies them unchanged.
2866         String src = "  A\uD800ÄA\u0308\uD900A\u0308\u00ad\u0323\uDBFFÄ\u0323," +
2867                 "\u00ad\uDC00\u1100\u1161가\u11A8가\u3133  \uDFFF";
2868         String expected = "  a\uD800ää\uD900ạ\u0308\uDBFFạ\u0308,\uDC00가객갃  \uDFFF";
2869         String result = nfkc_cf.normalize(src);
2870         assertEquals("normalize", expected, result);
2871     }
2872 
2873     @Test
TestComposeJamoTBase()2874     public void TestComposeJamoTBase() {
2875         // Algorithmic composition of Hangul syllables must not combine with JAMO_T_BASE = U+11A7
2876         // which is not a conjoining Jamo Trailing consonant.
2877         Normalizer2 nfkc = Normalizer2.getNFKCInstance();
2878         String s = "\u1100\u1161\u11A7\u1100\u314F\u11A7가\u11A7";
2879         String expected = "가\u11A7가\u11A7가\u11A7";
2880         String result = nfkc.normalize(s);
2881         assertEquals("normalize(LV+11A7)", expected, result);
2882         assertFalse("isNormalized(LV+11A7)", nfkc.isNormalized(s));
2883         assertTrue("isNormalized(normalized)", nfkc.isNormalized(result));
2884     }
2885 
2886     @Test
TestComposeBoundaryAfter()2887     public void TestComposeBoundaryAfter() {
2888         Normalizer2 nfkc = Normalizer2.getNFKCInstance();
2889         // U+02DA and U+FB2C do not have compose-boundaries-after.
2890         String s = "\u02DA\u0339 \uFB2C\u05B6";
2891         String expected = " \u0339\u030A \u05E9\u05B6\u05BC\u05C1";
2892         String result = nfkc.normalize(s);
2893         assertEquals("nfkc", expected, result);
2894         assertFalse("U+02DA boundary-after", nfkc.hasBoundaryAfter(0x2DA));
2895         assertFalse("U+FB2C boundary-after", nfkc.hasBoundaryAfter(0xFB2C));
2896     }
2897 
2898     @Test
TestNFC()2899     public void TestNFC() {
2900         // Coverage tests.
2901         Normalizer2 nfc = Normalizer2.getNFCInstance();
2902         assertTrue("nfc.hasBoundaryAfter(space)", nfc.hasBoundaryAfter(' '));
2903         assertFalse("nfc.hasBoundaryAfter(ä)", nfc.hasBoundaryAfter('ä'));
2904     }
2905 
2906     @Test
TestNFD()2907     public void TestNFD() {
2908         // Coverage tests.
2909         Normalizer2 nfd = Normalizer2.getNFDInstance();
2910         assertTrue("nfd.hasBoundaryAfter(space)", nfd.hasBoundaryAfter(' '));
2911         assertFalse("nfd.hasBoundaryAfter(ä)", nfd.hasBoundaryAfter('ä'));
2912     }
2913 
2914     @Test
TestFCD()2915     public void TestFCD() {
2916         // Coverage tests.
2917         Normalizer2 fcd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.FCD);
2918         assertTrue("fcd.hasBoundaryAfter(space)", fcd.hasBoundaryAfter(' '));
2919         assertFalse("fcd.hasBoundaryAfter(ä)", fcd.hasBoundaryAfter('ä'));
2920         assertTrue("fcd.isInert(space)", fcd.isInert(' '));
2921         assertFalse("fcd.isInert(ä)", fcd.isInert('ä'));
2922 
2923         // This implementation method is unreachable via public API.
2924         Norm2AllModes.FCDNormalizer2 impl = (Norm2AllModes.FCDNormalizer2)fcd;
2925         assertEquals("fcd impl.getQuickCheck(space)", 1, impl.getQuickCheck(' '));
2926         assertEquals("fcd impl.getQuickCheck(ä)", 0, impl.getQuickCheck('ä'));
2927     }
2928 
2929     @Test
TestNoneNormalizer()2930     public void TestNoneNormalizer() {
2931         // Use the deprecated Mode Normalizer.NONE for coverage of the internal NoopNormalizer2
2932         // as far as its methods are reachable that way.
2933         assertEquals("NONE.concatenate()", "ä\u0327",
2934                 Normalizer.concatenate("ä", "\u0327", Normalizer.NONE, 0));
2935         assertTrue("NONE.isNormalized()", Normalizer.isNormalized("ä\u0327", Normalizer.NONE, 0));
2936     }
2937 
2938     @Test
TestNoopNormalizer2()2939     public void TestNoopNormalizer2() {
2940         // Use the internal class directly for coverage of methods that are not publicly reachable.
2941         Normalizer2 noop = Norm2AllModes.NOOP_NORMALIZER2;
2942         assertEquals("noop.normalizeSecondAndAppend()", "ä\u0327",
2943                 noop.normalizeSecondAndAppend(new StringBuilder("ä"), "\u0327").toString());
2944         assertEquals("noop.getDecomposition()", null, noop.getDecomposition('ä'));
2945         assertTrue("noop.hasBoundaryAfter()", noop.hasBoundaryAfter(0x0308));
2946         assertTrue("noop.isInert()", noop.isInert(0x0308));
2947     }
2948 
2949     /*
2950      * Abstract class Normalizer2 has non-abstract methods which are overwritten by
2951      * its derived classes. To test these methods a derived class is defined here.
2952      */
2953     public class TestNormalizer2 extends Normalizer2 {
2954 
TestNormalizer2()2955         public TestNormalizer2() {}
2956         @Override
normalize(CharSequence src, StringBuilder dest)2957         public StringBuilder normalize(CharSequence src, StringBuilder dest) { return null; }
2958         @Override
normalize(CharSequence src, Appendable dest)2959         public Appendable normalize(CharSequence src, Appendable dest) { return null; }
2960         @Override
normalizeSecondAndAppend( StringBuilder first, CharSequence second)2961         public StringBuilder normalizeSecondAndAppend(
2962             StringBuilder first, CharSequence second) { return null; }
2963         @Override
append(StringBuilder first, CharSequence second)2964         public StringBuilder append(StringBuilder first, CharSequence second) { return null; }
2965         @Override
getDecomposition(int c)2966         public String getDecomposition(int c) { return null; }
2967         @Override
isNormalized(CharSequence s)2968         public boolean isNormalized(CharSequence s) { return false; }
2969         @Override
quickCheck(CharSequence s)2970         public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return null; }
2971         @Override
spanQuickCheckYes(CharSequence s)2972         public int spanQuickCheckYes(CharSequence s) { return 0; }
2973         @Override
hasBoundaryBefore(int c)2974         public boolean hasBoundaryBefore(int c) { return false; }
2975         @Override
hasBoundaryAfter(int c)2976         public boolean hasBoundaryAfter(int c) { return false; }
2977         @Override
isInert(int c)2978         public boolean isInert(int c) { return false; }
2979     }
2980 
2981     final TestNormalizer2 tnorm2 = new TestNormalizer2();
2982     @Test
TestGetRawDecompositionBase()2983     public void TestGetRawDecompositionBase() {
2984         int c = 'à';
2985         assertEquals("Unexpected value returned from Normalizer2.getRawDecomposition()",
2986                      null, tnorm2.getRawDecomposition(c));
2987     }
2988 
2989     @Test
TestComposePairBase()2990     public void TestComposePairBase() {
2991         int a = 'a';
2992         int b = '\u0300';
2993         assertEquals("Unexpected value returned from Normalizer2.composePair()",
2994                      -1, tnorm2.composePair(a, b));
2995     }
2996 
2997     @Test
TestGetCombiningClassBase()2998     public void TestGetCombiningClassBase() {
2999         int c = '\u00e0';
3000         assertEquals("Unexpected value returned from Normalizer2.getCombiningClass()",
3001                      0, tnorm2.getCombiningClass(c));
3002     }
3003 }
3004