• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.IOException;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Enumeration;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.List;
13 import java.util.Locale;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 
20 import org.unicode.cldr.draft.FileUtilities;
21 import org.unicode.cldr.util.CLDRConfig;
22 import org.unicode.cldr.util.CLDRFile;
23 import org.unicode.cldr.util.CLDRPaths;
24 import org.unicode.cldr.util.CLDRTransforms;
25 import org.unicode.cldr.util.Factory;
26 import org.unicode.cldr.util.Pair;
27 import org.unicode.cldr.util.PathUtilities;
28 import org.unicode.cldr.util.XMLFileReader;
29 import org.unicode.cldr.util.XPathParts;
30 
31 import com.google.common.base.Joiner;
32 import com.google.common.collect.ImmutableSet;
33 import com.ibm.icu.impl.Utility;
34 import com.ibm.icu.lang.UCharacter;
35 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
36 import com.ibm.icu.text.Normalizer2;
37 import com.ibm.icu.text.Transliterator;
38 import com.ibm.icu.text.UnicodeSet;
39 import com.ibm.icu.util.ULocale;
40 
41 public class TestTransforms extends TestFmwkPlus {
42     CLDRConfig testInfo = CLDRConfig.getInstance();
43 
main(String[] args)44     public static void main(String[] args) {
45         new TestTransforms().run(args);
46     }
47 
TestUzbek()48     public void TestUzbek() {
49         register();
50         Transliterator cyrillicToLatin = Transliterator
51             .getInstance("uz_Cyrl-uz_Latn");
52         Transliterator latinToCyrillic = cyrillicToLatin.getInverse();
53         // for (Transliterator t2 : t.getElements()) {
54         // System.out.println(t2.getSourceSet().toPattern(false) + " => " +
55         // t2.getTargetSet().toPattern(false));
56         // }
57         String cyrillic = "аА бБ вВ гГ ғҒ   дД ЕеЕ    ЁёЁ    жЖ зЗ иИ йЙ кК қҚ лЛ мМ нН оО пП рР сС тТ уУ ўЎ   фФ хХ ҳҲ ЦцЦ    ЧчЧ    ШшШ    бъ Ъ эЭ ЮюЮ    ЯяЯ";
58         String latin = "aA bB vV gG gʻGʻ dD YeyeYE YoyoYO jJ zZ iI yY kK qQ lL mM nN oO pP rR sS tT uU oʻOʻ fF xX hH TstsTS ChchCH ShshSH bʼ ʼ eE YuyuYU YayaYA";
59         UnicodeSet vowelsAndSigns = new UnicodeSet(
60             "[аА еЕёЁ иИ оО уУўЎ эЭ юЮ яЯ ьЬ ъЪ]").freeze();
61         UnicodeSet consonants = new UnicodeSet().addAll(cyrillic)
62             .removeAll(vowelsAndSigns).remove(" ").freeze();
63 
64         // UnicodeSet englishVowels = new UnicodeSet();
65         // for (String s : vowelsAndSigns) {
66         // String result = cyrillicToLatin.transform(s);
67         // if (!result.isEmpty()) {
68         // englishVowels.add(result);
69         // }
70         // }
71         // System.out.println(englishVowels.toPattern(false));
72 
73         String[] cyrillicSplit = cyrillic.split("\\s+");
74         String[] latinSplit = latin.split("\\s+");
75         for (int i = 0; i < cyrillicSplit.length; ++i) {
76             assertTransformsTo("Uzbek to Latin", latinSplit[i],
77                 cyrillicToLatin, cyrillicSplit[i]);
78             assertTransformsTo("Uzbek to Cyrillic", cyrillicSplit[i],
79                 latinToCyrillic, latinSplit[i]);
80         }
81 
82         // # е → 'ye' at the beginning of a syllable, after a vowel, ъ or ь,
83         // otherwise 'e'
84 
85         assertEquals("Uzbek to Latin", "Belgiya",
86             cyrillicToLatin.transform("Бельгия"));
87         UnicodeSet lower = new UnicodeSet("[:lowercase:]");
88         for (String e : new UnicodeSet("[еЕ]")) {
89             String ysuffix = lower.containsAll(e) ? "ye" : "YE";
90             String suffix = lower.containsAll(e) ? "e" : "E";
91             for (String s : vowelsAndSigns) {
92                 String expected = getPrefix(cyrillicToLatin, s, ysuffix);
93                 assertTransformsTo("Uzbek to Latin ye", expected,
94                     cyrillicToLatin, s + e);
95             }
96             for (String s : consonants) {
97                 String expected = getPrefix(cyrillicToLatin, s, suffix);
98                 assertTransformsTo("Uzbek to Latin e", expected,
99                     cyrillicToLatin, s + e);
100             }
101             for (String s : Arrays.asList(" ", "")) { // start of string,
102                 // non-letter
103                 String expected = getPrefix(cyrillicToLatin, s, ysuffix);
104                 assertTransformsTo("Uzbek to Latin ye", expected,
105                     cyrillicToLatin, s + e);
106             }
107         }
108 
109         if (isVerbose()) {
110             // Now check for correspondences
111             Factory factory = testInfo.getCldrFactory();
112             CLDRFile uzLatn = factory.make("uz_Latn", false);
113             CLDRFile uzCyrl = factory.make("uz", false);
114 
115             Set<String> latinFromCyrillicSucceeds = new TreeSet<>();
116             Set<String> latinFromCyrillicFails = new TreeSet<>();
117             for (String path : uzCyrl) {
118                 String latnValue = uzLatn.getStringValue(path);
119                 if (latnValue == null) {
120                     continue;
121                 }
122                 String cyrlValue = uzCyrl.getStringValue(path);
123                 if (cyrlValue == null) {
124                     continue;
125                 }
126                 String latnFromCyrl = cyrillicToLatin.transform(latnValue);
127                 if (latnValue.equals(latnFromCyrl)) {
128                     latinFromCyrillicSucceeds.add(latnValue + "\t←\t"
129                         + cyrlValue);
130                 } else {
131                     latinFromCyrillicFails.add(latnValue + "\t≠\t"
132                         + latnFromCyrl + "\t←\t" + cyrlValue);
133                 }
134             }
135             logln("Success! " + latinFromCyrillicSucceeds.size() + "\n"
136                 + Joiner.on("\n").join(latinFromCyrillicSucceeds));
137             logln("\nFAILS!" + latinFromCyrillicFails.size() + "\n"
138                 + Joiner.on("\n").join(latinFromCyrillicFails));
139         }
140     }
141 
getPrefix(Transliterator cyrillicToLatin, String prefixSource, String suffix)142     private String getPrefix(Transliterator cyrillicToLatin,
143         String prefixSource, String suffix) {
144         String result = cyrillicToLatin.transform(prefixSource);
145         if (!result.isEmpty()
146             && UCharacter.getType(suffix.codePointAt(0)) != ECharacterCategory.UPPERCASE_LETTER
147             && UCharacter.getType(result.codePointAt(0)) == ECharacterCategory.UPPERCASE_LETTER) {
148             result = UCharacter.toTitleCase(result, null);
149         }
150         return result + suffix;
151     }
152 
TestBackslashHalfwidth()153     public void TestBackslashHalfwidth() throws Exception {
154         register();
155         // CLDRTransforms.registerCldrTransforms(null,
156         // "(?i)(Fullwidth-Halfwidth|Halfwidth-Fullwidth)", isVerbose() ?
157         // getLogPrintWriter() : null);
158         // Transliterator.DEBUG = true;
159 
160         String input = "\"; // FF3C
161         String expected = "\\"; // 005C
162         Transliterator t = Transliterator.getInstance("Fullwidth-Halfwidth");
163         String output = t.transliterate(input);
164         assertEquals("To Halfwidth", expected, output);
165 
166         input = "\\"; // FF3C
167         expected = "\"; // 005C
168         Transliterator t2 = t.getInverse();
169         output = t2.transliterate(input);
170         assertEquals("To FullWidth", expected, output);
171     }
172 
TestASimple()173     public void TestASimple() {
174         Transliterator foo = Transliterator.getInstance("cs-cs_FONIPA");
175     }
176 
177     boolean registered = false;
178 
register()179     void register() {
180         if (!registered) {
181             CLDRTransforms.registerCldrTransforms(null, null,
182                 isVerbose() ? getLogPrintWriter() : null, true);
183             registered = true;
184         }
185     }
186 
187     enum Options {
188         transliterator, roundtrip
189     }
190 
makeLegacyTransformID(String source, String target, String variant)191     private String makeLegacyTransformID(String source, String target, String variant) {
192         if (variant != null) {
193             return source + "-" + target + "/" + variant;
194         } else {
195             return source + "-" + target;
196         }
197     }
198 
checkTransformID(String id, File file)199     private void checkTransformID(String id, File file) {
200         if (id.indexOf("-t-") > 0) {
201             String expected = ULocale.forLanguageTag(id).toLanguageTag();
202             if (!id.equals(expected)) {
203                 errln(file.getName() + ": BCP47-T identifier \"" +
204                     id + "\" should be \"" + expected + "\"");
205             }
206         }
207     }
208 
addTransformID(String id, File file, Map<String, File> ids)209     private void addTransformID(String id, File file, Map<String, File> ids) {
210         File oldFile = ids.get(id);
211         if (oldFile == null || oldFile.equals(file)) {
212             ids.put(id, file);
213         } else {
214             errln(file.getName() + ": Transform \"" + id +
215                 "\" already defined in " + oldFile.getName());
216         }
217     }
218 
addTransformIDs(File file, XPathParts parts, int element, Map<String, File> ids)219     private void addTransformIDs(File file, XPathParts parts, int element, Map<String, File> ids) {
220         String source = parts.getAttributeValue(element, "source");
221         String target = parts.getAttributeValue(element, "target");
222         String variant = parts.getAttributeValue(element, "variant");
223         String direction = parts.getAttributeValue(element, "direction");
224 
225         if (source != null && target != null) {
226             if ("forward".equals(direction)) {
227                 addTransformID(makeLegacyTransformID(source, target, variant), file, ids);
228             } else if ("both".equals(direction)) {
229                 addTransformID(makeLegacyTransformID(source, target, variant), file, ids);
230                 addTransformID(makeLegacyTransformID(target, source, variant), file, ids);
231             }
232         }
233 
234         String alias = parts.getAttributeValue(element, "alias");
235         if (alias != null) {
236             for (String id : alias.split("\\s+")) {
237                 addTransformID(id, file, ids);
238             }
239         }
240 
241         String backwardAlias = parts.getAttributeValue(element, "backwardAlias");
242         if (backwardAlias != null) {
243             if (!"both".equals(direction)) {
244                 errln(file.getName() + ": Expected direction=\"both\" " +
245                     "when backwardAlias is present");
246             }
247 
248             for (String id : backwardAlias.split("\\s+")) {
249                 addTransformID(id, file, ids);
250             }
251         }
252     }
253 
getTransformIDs(String transformsDirectoryPath)254     private Map<String, File> getTransformIDs(String transformsDirectoryPath) {
255         Map<String, File> ids = new HashMap<>();
256         File dir = new File(transformsDirectoryPath);
257         if (!dir.exists()) {
258             errln("Cannot find transforms directory at " + transformsDirectoryPath);
259             return ids;
260         }
261 
262         for (File file : dir.listFiles()) {
263             if (!file.getName().endsWith(".xml")) {
264                 continue;
265             }
266             List<Pair<String, String>> data = new ArrayList<>();
267             XMLFileReader.loadPathValues(file.getPath(), data, true);
268             for (Pair<String, String> entry : data) {
269                 final String xpath = entry.getFirst();
270                 if (xpath.startsWith("//supplementalData/transforms/transform[")) {
271                     String fileName = file.getName();
272                     XPathParts parts = XPathParts.getFrozenInstance(xpath);
273                     addTransformIDs(file, parts, 2, ids);
274                 }
275             }
276         }
277         return ids;
278     }
279 
280     final ImmutableSet<String> OK_MISSING_FROM_OLD = ImmutableSet.of("und-Sarb-t-und-ethi",
281         "Ethi-Sarb", "und-Ethi-t-und-latn", "Musnad-Ethiopic", "und-Ethi-t-und-sarb",
282         "Sarb-Ethi", "Ethiopic-Musnad");
283 
TestTransformIDs()284     public void TestTransformIDs() {
285         Map<String, File> transforms = getTransformIDs(CLDRPaths.TRANSFORMS_DIRECTORY);
286         for (Map.Entry<String, File> entry : transforms.entrySet()) {
287             checkTransformID(entry.getKey(), entry.getValue());
288         }
289 
290         // Only run the rest in exhaustive mode since it requires CLDR_ARCHIVE_DIRECTORY.
291         if (getInclusion() <= 5) {
292             return;
293         }
294 
295         Set<String> removedTransforms = new HashSet<>();
296         removedTransforms.add("ASCII-Latin"); // http://unicode.org/cldr/trac/ticket/9163
297 
298         Map<String, File> oldTransforms = getTransformIDs(CLDRPaths.LAST_TRANSFORMS_DIRECTORY);
299         for (Map.Entry<String, File> entry : oldTransforms.entrySet()) {
300             String id = entry.getKey();
301             if (!transforms.containsKey(id)
302                 && !removedTransforms.contains(id)
303                 && !OK_MISSING_FROM_OLD.contains(id)) {
304                 File oldFile = entry.getValue();
305                 errln("Missing transform \"" + id +
306                     "\"; the previous CLDR release had defined it in " + oldFile.getName());
307             }
308         }
309     }
310 
Test1461()311     public void Test1461() {
312         register();
313 
314         String[][] tests = {
315             { "transliterator=", "Katakana-Latin" },
316             { "\u30CF \u30CF\uFF70 \u30CF\uFF9E \u30CF\uFF9F",
317             "ha hā ba pa" },
318             { "transliterator=", "Hangul-Latin" },
319             { "roundtrip=", "true" }, { "갗", "gach" }, { "느", "neu" }, };
320 
321         Transliterator transform = null;
322         Transliterator inverse = null;
323         String id = null;
324         boolean roundtrip = false;
325         for (String[] items : tests) {
326             String source = items[0];
327             String target = items[1];
328             if (source.endsWith("=")) {
329                 switch (Options.valueOf(source
330                     .substring(0, source.length() - 1).toLowerCase(
331                         Locale.ENGLISH))) {
332                         case transliterator:
333                             id = target;
334                             transform = Transliterator.getInstance(id);
335                             inverse = Transliterator.getInstance(id,
336                                 Transliterator.REVERSE);
337                             break;
338                         case roundtrip:
339                             roundtrip = target.toLowerCase(Locale.ENGLISH).charAt(0) == 't';
340                             break;
341                 }
342                 continue;
343             }
344             String result = transform.transliterate(source);
345             assertEquals(id + ":from " + source, target, result);
346             if (roundtrip) {
347                 String result2 = inverse.transliterate(target);
348                 assertEquals(id + " (inv): from " + target, source, result2);
349             }
350         }
351     }
352 
Test8921()353     public void Test8921() {
354         register();
355         Transliterator trans = Transliterator.getInstance("Latin-ASCII");
356         assertEquals("Test8921", "Kornil'ev Kirill",
357             trans.transliterate("Kornilʹev Kirill"));
358     }
359 
360     private Pattern rfc6497Pattern = Pattern.compile("([a-zA-Z0-9-]+)-t-([a-zA-Z0-9-]+?)(?:-m0-([a-zA-Z0-9-]+))?");
361 
362     // cs-fonipa --> cs_fonipa; und-deva --> deva
363     // TODO: Remove this workaround once ICU supports BCP47-T identifiers.
364     // http://bugs.icu-project.org/trac/ticket/12599
getLegacyCode(String code)365     private String getLegacyCode(String code) {
366         code = code.replace('-', '_');
367         if (code.startsWith("und_") && code.length() == 8) {
368             code = code.substring(4);
369         }
370         return code;
371     }
372 
getTransliterator(String id)373     private Transliterator getTransliterator(String id) {
374         return Transliterator.getInstance(getOldTranslitId(id));
375     }
376 
getOldTranslitId(String id)377     private String getOldTranslitId(String id) {
378         // TODO: Pass unmodified transform name to ICU, once
379         // ICU can handle transform identifiers according to
380         // BCP47 Extension T (RFC 6497). The rewriting below
381         // is just a temporary workaround, allowing us to use
382         // BCP47-T identifiers for naming test data files.
383         // http://bugs.icu-project.org/trac/ticket/12599
384         if (id.equalsIgnoreCase("und-t-d0-publish")) {
385             return ("Any-Publishing");
386         } else if (id.equalsIgnoreCase("und-t-s0-publish")) {
387             return ("Publishing-Any");
388         } else if (id.equalsIgnoreCase("de-t-de-d0-ascii")) {
389             return ("de-ASCII");
390         } else if (id.equalsIgnoreCase("my-t-my-s0-zawgyi")) {
391             return ("Zawgyi-my");
392         } else if (id.equalsIgnoreCase("my-t-my-d0-zawgyi")) {
393             return "my-Zawgyi";
394         } else if (id.equalsIgnoreCase("und-t-d0-ascii")) {
395             return ("Latin-ASCII");
396         }
397 
398         Matcher rfc6497Matcher = rfc6497Pattern.matcher(id);
399         if (rfc6497Matcher.matches()) {
400             String targetLanguage = getLegacyCode(rfc6497Matcher.group(1));
401             String originalLanguage = getLegacyCode(rfc6497Matcher.group(2));
402             String mechanism = rfc6497Matcher.group(3);
403             id = originalLanguage + "-" + targetLanguage;
404             if (mechanism != null && !mechanism.isEmpty()) {
405                 id += "/" + mechanism.replace('-', '_');
406             }
407         }
408         return id;
409     }
410 
TestData()411     public void TestData() {
412         register();
413         try {
414             // get the folder name
415             String name = TestTransforms.class.getResource(".").toString();
416             if (!name.startsWith("file:")) {
417                 throw new IllegalArgumentException("Internal Error");
418             }
419             name = name.substring(5);
420             File fileDirectory = new File(CLDRPaths.TEST_DATA + "transforms/");
421             String fileDirectoryName = PathUtilities.getNormalizedPathString(fileDirectory);
422             assertTrue(fileDirectoryName, fileDirectory.exists());
423 
424             logln("Testing files in: " + fileDirectoryName);
425 
426             Set<String> foundTranslitsLower = new TreeSet();
427 
428             for (String file : fileDirectory.list()) {
429                 if (!file.endsWith(".txt") || file.startsWith("_readme")) {
430                     continue;
431                 }
432                 logln("Testing file: " + file);
433                 String transName = file.substring(0, file.length() - 4);
434                 if (transName.equals("ka-Latn-t-ka-m0-bgn")) {
435                     logKnownIssue("cldrbug:10566", "Jenkins build failing on translit problem");
436                     continue; // failures like the following need to be fixed first.
437                     // Error: (TestTransforms.java:434) : ka-Latn-t-ka-m0-bgn 2 Transform უფლება: expected "up’leba", got "upleba"
438                 }
439 
440                 Transliterator trans = getTransliterator(transName);
441                 String id = trans.getID().toLowerCase(Locale.ROOT);
442                 foundTranslitsLower.add(id);
443 
444                 BufferedReader in = FileUtilities.openUTF8Reader(fileDirectoryName, file);
445                 int counter = 0;
446                 while (true) {
447                     String line = in.readLine();
448                     if (line == null)
449                         break;
450                     line = line.trim();
451                     counter += 1;
452                     if (line.startsWith("#")) {
453                         continue;
454                     }
455                     String[] parts = line.split("\t");
456                     String source = parts[0];
457                     String expected = parts[1];
458                     String result = trans.transform(source);
459                     assertEquals(transName + " " + counter + " Transform "
460                         + source, expected, result);
461                 }
462                 in.close();
463             }
464             Set<String> allTranslitsLower = oldEnumConvertLower(Transliterator.getAvailableIDs(), new TreeSet<>());
465             // see which are missing tests
466             for (String s : allTranslitsLower) {
467                 if (!foundTranslitsLower.contains(s)) {
468                     warnln("Translit with no test file:\t" + s);
469                 }
470             }
471 
472             // all must be superset of found tests
473             for (String s : foundTranslitsLower) {
474                 if (!allTranslitsLower.contains(s)) {
475                     warnln("Test file with no translit:\t" + s);
476                 }
477             }
478 
479         } catch (IOException e) {
480             throw new IllegalArgumentException(e);
481         }
482     }
483 
oldEnumConvert(Enumeration<T> source, U target)484     private <T, U extends Collection<T>> U oldEnumConvert(Enumeration<T> source, U target) {
485         while (source.hasMoreElements()) {
486             target.add(source.nextElement());
487         }
488         return target;
489     }
490 
oldEnumConvertLower(Enumeration<String> source, U target)491     private <U extends Collection<String>> U oldEnumConvertLower(Enumeration<String> source, U target) {
492         while (source.hasMoreElements()) {
493             target.add(source.nextElement().toLowerCase(Locale.ROOT));
494         }
495         return target;
496     }
497 
498 
499     enum Casing {
500         Upper, Title, Lower
501     }
502 
TestCasing()503     public void TestCasing() {
504         register();
505         String greekSource = "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ";
506         // Transliterator.DEBUG = true;
507         Transliterator elTitle = checkString("el", Casing.Title,
508             "Οδός Οδός Σο Σο Oς Ος Σ Ἕξ", greekSource, true);
509         Transliterator elLower = checkString("el", Casing.Lower,
510             "οδός οδός σο σο oς ος σ ἕξ", greekSource, true);
511         Transliterator elUpper = checkString("el", Casing.Upper,
512             "ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", greekSource, true); // now true due to ICU #5456
513 
514         String turkishSource = "Isiİ İsıI";
515         Transliterator trTitle = checkString("tr", Casing.Title, "Isii İsıı",
516             turkishSource, true);
517         Transliterator trLower = checkString("tr", Casing.Lower, "ısii isıı",
518             turkishSource, true);
519         Transliterator trUpper = checkString("tr", Casing.Upper, "ISİİ İSII",
520             turkishSource, true);
521         Transliterator azTitle = checkString("az", Casing.Title, "Isii İsıı",
522             turkishSource, true);
523         Transliterator azLower = checkString("az", Casing.Lower, "ısii isıı",
524             turkishSource, true);
525         Transliterator azUpper = checkString("az", Casing.Upper, "ISİİ İSII",
526             turkishSource, true);
527 
528         String lithuanianSource = "I \u00CF J J\u0308 \u012E \u012E\u0308 \u00CC \u00CD \u0128 xi\u0307\u0308 xj\u0307\u0308 x\u012F\u0307\u0308 xi\u0307\u0300 xi\u0307\u0301 xi\u0307\u0303 XI X\u00CF XJ XJ\u0308 X\u012E X\u012E\u0308";
529         // The following test was formerly skipped with
530         // !logKnownIssue("11094", "Fix ICU4J UCharacter.toTitleCase/toLowerCase for lt").
531         // However [https://unicode-org.atlassian.net/browse/ICU-11094] is supposedly
532         // fixed in the version of ICU4J currently in CLDR, but removing the logKnownIssue
533         // to execute the test results in test failures, mainly for  i\u0307\u0308.
534         // So I am changing the logKnownIssue to reference a CLDR ticket about
535         // investigating the test (it may be wrong).
536         if (!logKnownIssue("cldrbug:13313",
537             "Investigate the Lithuanian casing test, it may be wrong")) {
538             Transliterator ltTitle = checkString(
539                 "lt",
540                 Casing.Title,
541                 "I \u00CF J J\u0308 \u012E \u012E\u0308 \u00CC \u00CD \u0128 Xi\u0307\u0308 Xj\u0307\u0308 X\u012F\u0307\u0308 Xi\u0307\u0300 Xi\u0307\u0301 Xi\u0307\u0303 Xi Xi\u0307\u0308 Xj Xj\u0307\u0308 X\u012F X\u012F\u0307\u0308",
542                 lithuanianSource, true);
543             Transliterator ltLower = checkString(
544                 "lt",
545                 Casing.Lower,
546                 "i i\u0307\u0308 j j\u0307\u0308 \u012F \u012F\u0307\u0308 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 xi\u0307\u0308 xj\u0307\u0308 x\u012F\u0307\u0308 xi\u0307\u0300 xi\u0307\u0301 xi\u0307\u0303 xi xi\u0307\u0308 xj xj\u0307\u0308 x\u012F x\u012F\u0307\u0308",
547                 lithuanianSource, true);
548         }
549         Transliterator ltUpper = checkString(
550             "lt",
551             Casing.Upper,
552             "I \u00CF J J\u0308 \u012E \u012E\u0308 \u00CC \u00CD \u0128 X\u00CF XJ\u0308 X\u012E\u0308 X\u00CC X\u00CD X\u0128 XI X\u00CF XJ XJ\u0308 X\u012E X\u012E\u0308",
553             lithuanianSource, true);
554 
555         String dutchSource = "IJKIJ ijkij IjkIj";
556         Transliterator nlTitle = checkString("nl", Casing.Title,
557             "IJkij IJkij IJkij", dutchSource, true);
558         // Transliterator nlLower = checkString("nl", Casing.Lower, "ısii isıı",
559         // turkishSource);
560         // Transliterator nlUpper = checkString("tr", Casing.Upper, "ISİİ İSII",
561         // turkishSource);
562     }
563 
checkString(String locale, Casing casing, String expected, String source, boolean sameAsSpecialCasing)564     private Transliterator checkString(String locale, Casing casing,
565         String expected, String source, boolean sameAsSpecialCasing) {
566         Transliterator translit = Transliterator.getInstance(locale + "-"
567             + casing);
568         String result = checkString(locale, expected, source, translit);
569         ULocale ulocale = new ULocale(locale);
570         String specialCasing;
571         Normalizer2 normNFC = Normalizer2.getNFCInstance(); // UCharacter.toXxxCase
572         // doesn't
573         // normalize,
574         // Transliterator
575         // does
576         switch (casing) {
577         case Upper:
578             specialCasing = normNFC.normalize(UCharacter.toUpperCase(ulocale,
579                 source));
580             break;
581         case Title:
582             specialCasing = normNFC.normalize(UCharacter.toTitleCase(ulocale,
583                 source, null));
584             break;
585         case Lower:
586             specialCasing = normNFC.normalize(UCharacter.toLowerCase(ulocale,
587                 source));
588             break;
589         default:
590             throw new IllegalArgumentException();
591         }
592         if (sameAsSpecialCasing) {
593             if (!assertEquals(locale + "-" + casing + " Vs SpecialCasing",
594                 specialCasing, result)) {
595                 showFirstDifference("Special: ", specialCasing, "Transform: ",
596                     result);
597             }
598         } else {
599             assertNotEquals(locale + "-" + casing + "Vs SpecialCasing",
600                 specialCasing, result);
601         }
602         return translit;
603     }
604 
checkString(String locale, String expected, String source, Transliterator translit)605     private String checkString(String locale, String expected, String source,
606         Transliterator translit) {
607         String transformed = translit.transform(source);
608         if (!assertEquals(locale, expected, transformed)) {
609             showTransliterator(translit);
610         }
611         return transformed;
612     }
613 
showFirstDifference(String titleA, String a, String titleB, String b)614     private void showFirstDifference(String titleA, String a, String titleB,
615         String b) {
616         StringBuilder buffer = new StringBuilder();
617         for (int i = 0; i < Math.min(a.length(), b.length()); ++i) {
618             char aChar = a.charAt(i);
619             char bChar = b.charAt(i);
620             if (aChar == bChar) {
621                 buffer.append(aChar);
622             } else {
623                 errln("\t" + buffer + "\n\t\t" + titleA + "\t"
624                     + Utility.hex(a.substring(i)) + "\n\t\t" + titleB
625                     + "\t" + Utility.hex(b.substring(i)));
626                 return;
627             }
628         }
629         errln("different length");
630     }
631 
showTransliterator(Transliterator t)632     private void showTransliterator(Transliterator t) {
633         org.unicode.cldr.test.TestTransforms.showTransliterator("", t, 999);
634     }
635 
Test9925()636     public void Test9925() {
637         register();
638         Transliterator pinyin = getTransliterator("und-Latn-t-und-hani");
639         assertEquals("賈 bug", "jiǎ", pinyin.transform("賈"));
640     }
641 
TestHiraKata()642     public void TestHiraKata() { // for CLDR-13127 and ...
643         register();
644         Transliterator hiraKata = getTransliterator("Hiragana-Katakana");
645         assertEquals("Hira-Kata", hiraKata.transform("゛゜ わ゙ ゟ"), "゛゜ ヷ ヨリ");
646     }
647 
TestZawgyiToUnicode10899()648   public void TestZawgyiToUnicode10899() {
649     // Some tests for the transformation of Zawgyi font encoding to Unicode Burmese.
650     Transliterator z2u = getTransliterator("my-t-my-s0-zawgyi");
651 
652     String z1 =
653         "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u1039\u1031\u1010\u103C";
654     String expected =
655         "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u103A\u1010\u103D\u1031";
656 
657     String actual = z2u.transform(z1);
658 
659     assertEquals("z1 to u1", expected, actual);
660 
661     String z2 = "တကယ္ဆို အျငိႈးေတြမဲ႔ေသာလမ္းေသာလမ္းမွာ တိုႈျပန္ဆံုျကတဲ႔အခါ ";
662     expected = "တကယ်ဆို အငြှိုးတွေမဲ့သောလမ်းသောလမ်းမှာ တှိုပြန်ဆုံကြတဲ့အခါ ";
663     actual = z2u.transform(z2);
664     assertEquals("z2 to u2", expected, actual);
665 
666     String z3 = "ျပန္လမ္းမဲ့ကၽြန္းအပိုင္း၄";
667     expected = "ပြန်လမ်းမဲ့ကျွန်းအပိုင်း၎";
668     actual = z2u.transform(z3);
669     assertEquals("z3 to u3", expected, actual);
670   }
671 
TestUnicodeToZawgyi111107()672   public void TestUnicodeToZawgyi111107() {
673     // Some tests for the transformation from Unicode to Zawgyi font encoding
674     Transliterator u2z = getTransliterator("my-t-my-d0-zawgyi");
675 
676     String expected =
677         "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u1039\u1031\u1010\u103C";
678     String u1 =
679         "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u103A\u1010\u103D\u1031";
680 
681     String actual = u2z.transform(u1);
682 
683     assertEquals("u1 to z1", expected, actual);
684 
685     expected = "တကယ္ဆို အၿငႇိဳးေတြမဲ့ေသာလမ္းေသာလမ္းမွာ တိႈျပန္ဆံုၾကတဲ့အခါ ";
686     String u2 = "တကယ်ဆို အငြှိုးတွေမဲ့သောလမ်းသောလမ်းမှာ တှိုပြန်ဆုံကြတဲ့အခါ ";
687     actual = u2z.transform(u2);
688     assertEquals("u2 to z2", expected, actual);
689 
690     expected = "ျပန္လမ္းမဲ့ကြၽန္းအပိုင္း၄";
691     String u3 = "ပြန်လမ်းမဲ့ကျွန်းအပိုင်း၎";
692     actual = u2z.transform(u3);
693     assertEquals("u3 to z3", expected, actual);
694   }
695 }
696