1 package org.unicode.cldr.unittest; 2 3 import java.io.BufferedReader; 4 import java.io.File; 5 import java.io.IOException; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Enumeration; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.List; 13 import java.util.Locale; 14 import java.util.Map; 15 import java.util.Set; 16 import java.util.TreeSet; 17 import java.util.regex.Matcher; 18 import java.util.regex.Pattern; 19 20 import org.unicode.cldr.draft.FileUtilities; 21 import org.unicode.cldr.util.CLDRConfig; 22 import org.unicode.cldr.util.CLDRFile; 23 import org.unicode.cldr.util.CLDRPaths; 24 import org.unicode.cldr.util.CLDRTransforms; 25 import org.unicode.cldr.util.Factory; 26 import org.unicode.cldr.util.Pair; 27 import org.unicode.cldr.util.PathUtilities; 28 import org.unicode.cldr.util.XMLFileReader; 29 import org.unicode.cldr.util.XPathParts; 30 31 import com.google.common.base.Joiner; 32 import com.google.common.collect.ImmutableSet; 33 import com.ibm.icu.impl.Utility; 34 import com.ibm.icu.lang.UCharacter; 35 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 36 import com.ibm.icu.text.Normalizer2; 37 import com.ibm.icu.text.Transliterator; 38 import com.ibm.icu.text.UnicodeSet; 39 import com.ibm.icu.util.ULocale; 40 41 public class TestTransforms extends TestFmwkPlus { 42 CLDRConfig testInfo = CLDRConfig.getInstance(); 43 main(String[] args)44 public static void main(String[] args) { 45 new TestTransforms().run(args); 46 } 47 TestUzbek()48 public void TestUzbek() { 49 register(); 50 Transliterator cyrillicToLatin = Transliterator 51 .getInstance("uz_Cyrl-uz_Latn"); 52 Transliterator latinToCyrillic = cyrillicToLatin.getInverse(); 53 // for (Transliterator t2 : t.getElements()) { 54 // System.out.println(t2.getSourceSet().toPattern(false) + " => " + 55 // t2.getTargetSet().toPattern(false)); 56 // } 57 String cyrillic = "аА бБ вВ гГ ғҒ дД ЕеЕ ЁёЁ жЖ зЗ иИ йЙ кК қҚ лЛ мМ нН оО пП рР сС тТ уУ ўЎ фФ хХ ҳҲ ЦцЦ ЧчЧ ШшШ бъ Ъ эЭ ЮюЮ ЯяЯ"; 58 String latin = "aA bB vV gG gʻGʻ dD YeyeYE YoyoYO jJ zZ iI yY kK qQ lL mM nN oO pP rR sS tT uU oʻOʻ fF xX hH TstsTS ChchCH ShshSH bʼ ʼ eE YuyuYU YayaYA"; 59 UnicodeSet vowelsAndSigns = new UnicodeSet( 60 "[аА еЕёЁ иИ оО уУўЎ эЭ юЮ яЯ ьЬ ъЪ]").freeze(); 61 UnicodeSet consonants = new UnicodeSet().addAll(cyrillic) 62 .removeAll(vowelsAndSigns).remove(" ").freeze(); 63 64 // UnicodeSet englishVowels = new UnicodeSet(); 65 // for (String s : vowelsAndSigns) { 66 // String result = cyrillicToLatin.transform(s); 67 // if (!result.isEmpty()) { 68 // englishVowels.add(result); 69 // } 70 // } 71 // System.out.println(englishVowels.toPattern(false)); 72 73 String[] cyrillicSplit = cyrillic.split("\\s+"); 74 String[] latinSplit = latin.split("\\s+"); 75 for (int i = 0; i < cyrillicSplit.length; ++i) { 76 assertTransformsTo("Uzbek to Latin", latinSplit[i], 77 cyrillicToLatin, cyrillicSplit[i]); 78 assertTransformsTo("Uzbek to Cyrillic", cyrillicSplit[i], 79 latinToCyrillic, latinSplit[i]); 80 } 81 82 // # е → 'ye' at the beginning of a syllable, after a vowel, ъ or ь, 83 // otherwise 'e' 84 85 assertEquals("Uzbek to Latin", "Belgiya", 86 cyrillicToLatin.transform("Бельгия")); 87 UnicodeSet lower = new UnicodeSet("[:lowercase:]"); 88 for (String e : new UnicodeSet("[еЕ]")) { 89 String ysuffix = lower.containsAll(e) ? "ye" : "YE"; 90 String suffix = lower.containsAll(e) ? "e" : "E"; 91 for (String s : vowelsAndSigns) { 92 String expected = getPrefix(cyrillicToLatin, s, ysuffix); 93 assertTransformsTo("Uzbek to Latin ye", expected, 94 cyrillicToLatin, s + e); 95 } 96 for (String s : consonants) { 97 String expected = getPrefix(cyrillicToLatin, s, suffix); 98 assertTransformsTo("Uzbek to Latin e", expected, 99 cyrillicToLatin, s + e); 100 } 101 for (String s : Arrays.asList(" ", "")) { // start of string, 102 // non-letter 103 String expected = getPrefix(cyrillicToLatin, s, ysuffix); 104 assertTransformsTo("Uzbek to Latin ye", expected, 105 cyrillicToLatin, s + e); 106 } 107 } 108 109 if (isVerbose()) { 110 // Now check for correspondences 111 Factory factory = testInfo.getCldrFactory(); 112 CLDRFile uzLatn = factory.make("uz_Latn", false); 113 CLDRFile uzCyrl = factory.make("uz", false); 114 115 Set<String> latinFromCyrillicSucceeds = new TreeSet<>(); 116 Set<String> latinFromCyrillicFails = new TreeSet<>(); 117 for (String path : uzCyrl) { 118 String latnValue = uzLatn.getStringValue(path); 119 if (latnValue == null) { 120 continue; 121 } 122 String cyrlValue = uzCyrl.getStringValue(path); 123 if (cyrlValue == null) { 124 continue; 125 } 126 String latnFromCyrl = cyrillicToLatin.transform(latnValue); 127 if (latnValue.equals(latnFromCyrl)) { 128 latinFromCyrillicSucceeds.add(latnValue + "\t←\t" 129 + cyrlValue); 130 } else { 131 latinFromCyrillicFails.add(latnValue + "\t≠\t" 132 + latnFromCyrl + "\t←\t" + cyrlValue); 133 } 134 } 135 logln("Success! " + latinFromCyrillicSucceeds.size() + "\n" 136 + Joiner.on("\n").join(latinFromCyrillicSucceeds)); 137 logln("\nFAILS!" + latinFromCyrillicFails.size() + "\n" 138 + Joiner.on("\n").join(latinFromCyrillicFails)); 139 } 140 } 141 getPrefix(Transliterator cyrillicToLatin, String prefixSource, String suffix)142 private String getPrefix(Transliterator cyrillicToLatin, 143 String prefixSource, String suffix) { 144 String result = cyrillicToLatin.transform(prefixSource); 145 if (!result.isEmpty() 146 && UCharacter.getType(suffix.codePointAt(0)) != ECharacterCategory.UPPERCASE_LETTER 147 && UCharacter.getType(result.codePointAt(0)) == ECharacterCategory.UPPERCASE_LETTER) { 148 result = UCharacter.toTitleCase(result, null); 149 } 150 return result + suffix; 151 } 152 TestBackslashHalfwidth()153 public void TestBackslashHalfwidth() throws Exception { 154 register(); 155 // CLDRTransforms.registerCldrTransforms(null, 156 // "(?i)(Fullwidth-Halfwidth|Halfwidth-Fullwidth)", isVerbose() ? 157 // getLogPrintWriter() : null); 158 // Transliterator.DEBUG = true; 159 160 String input = "\"; // FF3C 161 String expected = "\\"; // 005C 162 Transliterator t = Transliterator.getInstance("Fullwidth-Halfwidth"); 163 String output = t.transliterate(input); 164 assertEquals("To Halfwidth", expected, output); 165 166 input = "\\"; // FF3C 167 expected = "\"; // 005C 168 Transliterator t2 = t.getInverse(); 169 output = t2.transliterate(input); 170 assertEquals("To FullWidth", expected, output); 171 } 172 TestASimple()173 public void TestASimple() { 174 Transliterator foo = Transliterator.getInstance("cs-cs_FONIPA"); 175 } 176 177 boolean registered = false; 178 register()179 void register() { 180 if (!registered) { 181 CLDRTransforms.registerCldrTransforms(null, null, 182 isVerbose() ? getLogPrintWriter() : null, true); 183 registered = true; 184 } 185 } 186 187 enum Options { 188 transliterator, roundtrip 189 } 190 makeLegacyTransformID(String source, String target, String variant)191 private String makeLegacyTransformID(String source, String target, String variant) { 192 if (variant != null) { 193 return source + "-" + target + "/" + variant; 194 } else { 195 return source + "-" + target; 196 } 197 } 198 checkTransformID(String id, File file)199 private void checkTransformID(String id, File file) { 200 if (id.indexOf("-t-") > 0) { 201 String expected = ULocale.forLanguageTag(id).toLanguageTag(); 202 if (!id.equals(expected)) { 203 errln(file.getName() + ": BCP47-T identifier \"" + 204 id + "\" should be \"" + expected + "\""); 205 } 206 } 207 } 208 addTransformID(String id, File file, Map<String, File> ids)209 private void addTransformID(String id, File file, Map<String, File> ids) { 210 File oldFile = ids.get(id); 211 if (oldFile == null || oldFile.equals(file)) { 212 ids.put(id, file); 213 } else { 214 errln(file.getName() + ": Transform \"" + id + 215 "\" already defined in " + oldFile.getName()); 216 } 217 } 218 addTransformIDs(File file, XPathParts parts, int element, Map<String, File> ids)219 private void addTransformIDs(File file, XPathParts parts, int element, Map<String, File> ids) { 220 String source = parts.getAttributeValue(element, "source"); 221 String target = parts.getAttributeValue(element, "target"); 222 String variant = parts.getAttributeValue(element, "variant"); 223 String direction = parts.getAttributeValue(element, "direction"); 224 225 if (source != null && target != null) { 226 if ("forward".equals(direction)) { 227 addTransformID(makeLegacyTransformID(source, target, variant), file, ids); 228 } else if ("both".equals(direction)) { 229 addTransformID(makeLegacyTransformID(source, target, variant), file, ids); 230 addTransformID(makeLegacyTransformID(target, source, variant), file, ids); 231 } 232 } 233 234 String alias = parts.getAttributeValue(element, "alias"); 235 if (alias != null) { 236 for (String id : alias.split("\\s+")) { 237 addTransformID(id, file, ids); 238 } 239 } 240 241 String backwardAlias = parts.getAttributeValue(element, "backwardAlias"); 242 if (backwardAlias != null) { 243 if (!"both".equals(direction)) { 244 errln(file.getName() + ": Expected direction=\"both\" " + 245 "when backwardAlias is present"); 246 } 247 248 for (String id : backwardAlias.split("\\s+")) { 249 addTransformID(id, file, ids); 250 } 251 } 252 } 253 getTransformIDs(String transformsDirectoryPath)254 private Map<String, File> getTransformIDs(String transformsDirectoryPath) { 255 Map<String, File> ids = new HashMap<>(); 256 File dir = new File(transformsDirectoryPath); 257 if (!dir.exists()) { 258 errln("Cannot find transforms directory at " + transformsDirectoryPath); 259 return ids; 260 } 261 262 for (File file : dir.listFiles()) { 263 if (!file.getName().endsWith(".xml")) { 264 continue; 265 } 266 List<Pair<String, String>> data = new ArrayList<>(); 267 XMLFileReader.loadPathValues(file.getPath(), data, true); 268 for (Pair<String, String> entry : data) { 269 final String xpath = entry.getFirst(); 270 if (xpath.startsWith("//supplementalData/transforms/transform[")) { 271 String fileName = file.getName(); 272 XPathParts parts = XPathParts.getFrozenInstance(xpath); 273 addTransformIDs(file, parts, 2, ids); 274 } 275 } 276 } 277 return ids; 278 } 279 280 final ImmutableSet<String> OK_MISSING_FROM_OLD = ImmutableSet.of("und-Sarb-t-und-ethi", 281 "Ethi-Sarb", "und-Ethi-t-und-latn", "Musnad-Ethiopic", "und-Ethi-t-und-sarb", 282 "Sarb-Ethi", "Ethiopic-Musnad"); 283 TestTransformIDs()284 public void TestTransformIDs() { 285 Map<String, File> transforms = getTransformIDs(CLDRPaths.TRANSFORMS_DIRECTORY); 286 for (Map.Entry<String, File> entry : transforms.entrySet()) { 287 checkTransformID(entry.getKey(), entry.getValue()); 288 } 289 290 // Only run the rest in exhaustive mode since it requires CLDR_ARCHIVE_DIRECTORY. 291 if (getInclusion() <= 5) { 292 return; 293 } 294 295 Set<String> removedTransforms = new HashSet<>(); 296 removedTransforms.add("ASCII-Latin"); // http://unicode.org/cldr/trac/ticket/9163 297 298 Map<String, File> oldTransforms = getTransformIDs(CLDRPaths.LAST_TRANSFORMS_DIRECTORY); 299 for (Map.Entry<String, File> entry : oldTransforms.entrySet()) { 300 String id = entry.getKey(); 301 if (!transforms.containsKey(id) 302 && !removedTransforms.contains(id) 303 && !OK_MISSING_FROM_OLD.contains(id)) { 304 File oldFile = entry.getValue(); 305 errln("Missing transform \"" + id + 306 "\"; the previous CLDR release had defined it in " + oldFile.getName()); 307 } 308 } 309 } 310 Test1461()311 public void Test1461() { 312 register(); 313 314 String[][] tests = { 315 { "transliterator=", "Katakana-Latin" }, 316 { "\u30CF \u30CF\uFF70 \u30CF\uFF9E \u30CF\uFF9F", 317 "ha hā ba pa" }, 318 { "transliterator=", "Hangul-Latin" }, 319 { "roundtrip=", "true" }, { "갗", "gach" }, { "느", "neu" }, }; 320 321 Transliterator transform = null; 322 Transliterator inverse = null; 323 String id = null; 324 boolean roundtrip = false; 325 for (String[] items : tests) { 326 String source = items[0]; 327 String target = items[1]; 328 if (source.endsWith("=")) { 329 switch (Options.valueOf(source 330 .substring(0, source.length() - 1).toLowerCase( 331 Locale.ENGLISH))) { 332 case transliterator: 333 id = target; 334 transform = Transliterator.getInstance(id); 335 inverse = Transliterator.getInstance(id, 336 Transliterator.REVERSE); 337 break; 338 case roundtrip: 339 roundtrip = target.toLowerCase(Locale.ENGLISH).charAt(0) == 't'; 340 break; 341 } 342 continue; 343 } 344 String result = transform.transliterate(source); 345 assertEquals(id + ":from " + source, target, result); 346 if (roundtrip) { 347 String result2 = inverse.transliterate(target); 348 assertEquals(id + " (inv): from " + target, source, result2); 349 } 350 } 351 } 352 Test8921()353 public void Test8921() { 354 register(); 355 Transliterator trans = Transliterator.getInstance("Latin-ASCII"); 356 assertEquals("Test8921", "Kornil'ev Kirill", 357 trans.transliterate("Kornilʹev Kirill")); 358 } 359 360 private Pattern rfc6497Pattern = Pattern.compile("([a-zA-Z0-9-]+)-t-([a-zA-Z0-9-]+?)(?:-m0-([a-zA-Z0-9-]+))?"); 361 362 // cs-fonipa --> cs_fonipa; und-deva --> deva 363 // TODO: Remove this workaround once ICU supports BCP47-T identifiers. 364 // http://bugs.icu-project.org/trac/ticket/12599 getLegacyCode(String code)365 private String getLegacyCode(String code) { 366 code = code.replace('-', '_'); 367 if (code.startsWith("und_") && code.length() == 8) { 368 code = code.substring(4); 369 } 370 return code; 371 } 372 getTransliterator(String id)373 private Transliterator getTransliterator(String id) { 374 return Transliterator.getInstance(getOldTranslitId(id)); 375 } 376 getOldTranslitId(String id)377 private String getOldTranslitId(String id) { 378 // TODO: Pass unmodified transform name to ICU, once 379 // ICU can handle transform identifiers according to 380 // BCP47 Extension T (RFC 6497). The rewriting below 381 // is just a temporary workaround, allowing us to use 382 // BCP47-T identifiers for naming test data files. 383 // http://bugs.icu-project.org/trac/ticket/12599 384 if (id.equalsIgnoreCase("und-t-d0-publish")) { 385 return ("Any-Publishing"); 386 } else if (id.equalsIgnoreCase("und-t-s0-publish")) { 387 return ("Publishing-Any"); 388 } else if (id.equalsIgnoreCase("de-t-de-d0-ascii")) { 389 return ("de-ASCII"); 390 } else if (id.equalsIgnoreCase("my-t-my-s0-zawgyi")) { 391 return ("Zawgyi-my"); 392 } else if (id.equalsIgnoreCase("my-t-my-d0-zawgyi")) { 393 return "my-Zawgyi"; 394 } else if (id.equalsIgnoreCase("und-t-d0-ascii")) { 395 return ("Latin-ASCII"); 396 } 397 398 Matcher rfc6497Matcher = rfc6497Pattern.matcher(id); 399 if (rfc6497Matcher.matches()) { 400 String targetLanguage = getLegacyCode(rfc6497Matcher.group(1)); 401 String originalLanguage = getLegacyCode(rfc6497Matcher.group(2)); 402 String mechanism = rfc6497Matcher.group(3); 403 id = originalLanguage + "-" + targetLanguage; 404 if (mechanism != null && !mechanism.isEmpty()) { 405 id += "/" + mechanism.replace('-', '_'); 406 } 407 } 408 return id; 409 } 410 TestData()411 public void TestData() { 412 register(); 413 try { 414 // get the folder name 415 String name = TestTransforms.class.getResource(".").toString(); 416 if (!name.startsWith("file:")) { 417 throw new IllegalArgumentException("Internal Error"); 418 } 419 name = name.substring(5); 420 File fileDirectory = new File(CLDRPaths.TEST_DATA + "transforms/"); 421 String fileDirectoryName = PathUtilities.getNormalizedPathString(fileDirectory); 422 assertTrue(fileDirectoryName, fileDirectory.exists()); 423 424 logln("Testing files in: " + fileDirectoryName); 425 426 Set<String> foundTranslitsLower = new TreeSet(); 427 428 for (String file : fileDirectory.list()) { 429 if (!file.endsWith(".txt") || file.startsWith("_readme")) { 430 continue; 431 } 432 logln("Testing file: " + file); 433 String transName = file.substring(0, file.length() - 4); 434 if (transName.equals("ka-Latn-t-ka-m0-bgn")) { 435 logKnownIssue("cldrbug:10566", "Jenkins build failing on translit problem"); 436 continue; // failures like the following need to be fixed first. 437 // Error: (TestTransforms.java:434) : ka-Latn-t-ka-m0-bgn 2 Transform უფლება: expected "up’leba", got "upleba" 438 } 439 440 Transliterator trans = getTransliterator(transName); 441 String id = trans.getID().toLowerCase(Locale.ROOT); 442 foundTranslitsLower.add(id); 443 444 BufferedReader in = FileUtilities.openUTF8Reader(fileDirectoryName, file); 445 int counter = 0; 446 while (true) { 447 String line = in.readLine(); 448 if (line == null) 449 break; 450 line = line.trim(); 451 counter += 1; 452 if (line.startsWith("#")) { 453 continue; 454 } 455 String[] parts = line.split("\t"); 456 String source = parts[0]; 457 String expected = parts[1]; 458 String result = trans.transform(source); 459 assertEquals(transName + " " + counter + " Transform " 460 + source, expected, result); 461 } 462 in.close(); 463 } 464 Set<String> allTranslitsLower = oldEnumConvertLower(Transliterator.getAvailableIDs(), new TreeSet<>()); 465 // see which are missing tests 466 for (String s : allTranslitsLower) { 467 if (!foundTranslitsLower.contains(s)) { 468 warnln("Translit with no test file:\t" + s); 469 } 470 } 471 472 // all must be superset of found tests 473 for (String s : foundTranslitsLower) { 474 if (!allTranslitsLower.contains(s)) { 475 warnln("Test file with no translit:\t" + s); 476 } 477 } 478 479 } catch (IOException e) { 480 throw new IllegalArgumentException(e); 481 } 482 } 483 oldEnumConvert(Enumeration<T> source, U target)484 private <T, U extends Collection<T>> U oldEnumConvert(Enumeration<T> source, U target) { 485 while (source.hasMoreElements()) { 486 target.add(source.nextElement()); 487 } 488 return target; 489 } 490 oldEnumConvertLower(Enumeration<String> source, U target)491 private <U extends Collection<String>> U oldEnumConvertLower(Enumeration<String> source, U target) { 492 while (source.hasMoreElements()) { 493 target.add(source.nextElement().toLowerCase(Locale.ROOT)); 494 } 495 return target; 496 } 497 498 499 enum Casing { 500 Upper, Title, Lower 501 } 502 TestCasing()503 public void TestCasing() { 504 register(); 505 String greekSource = "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ"; 506 // Transliterator.DEBUG = true; 507 Transliterator elTitle = checkString("el", Casing.Title, 508 "Οδός Οδός Σο Σο Oς Ος Σ Ἕξ", greekSource, true); 509 Transliterator elLower = checkString("el", Casing.Lower, 510 "οδός οδός σο σο oς ος σ ἕξ", greekSource, true); 511 Transliterator elUpper = checkString("el", Casing.Upper, 512 "ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", greekSource, true); // now true due to ICU #5456 513 514 String turkishSource = "Isiİ İsıI"; 515 Transliterator trTitle = checkString("tr", Casing.Title, "Isii İsıı", 516 turkishSource, true); 517 Transliterator trLower = checkString("tr", Casing.Lower, "ısii isıı", 518 turkishSource, true); 519 Transliterator trUpper = checkString("tr", Casing.Upper, "ISİİ İSII", 520 turkishSource, true); 521 Transliterator azTitle = checkString("az", Casing.Title, "Isii İsıı", 522 turkishSource, true); 523 Transliterator azLower = checkString("az", Casing.Lower, "ısii isıı", 524 turkishSource, true); 525 Transliterator azUpper = checkString("az", Casing.Upper, "ISİİ İSII", 526 turkishSource, true); 527 528 String lithuanianSource = "I \u00CF J J\u0308 \u012E \u012E\u0308 \u00CC \u00CD \u0128 xi\u0307\u0308 xj\u0307\u0308 x\u012F\u0307\u0308 xi\u0307\u0300 xi\u0307\u0301 xi\u0307\u0303 XI X\u00CF XJ XJ\u0308 X\u012E X\u012E\u0308"; 529 // The following test was formerly skipped with 530 // !logKnownIssue("11094", "Fix ICU4J UCharacter.toTitleCase/toLowerCase for lt"). 531 // However [https://unicode-org.atlassian.net/browse/ICU-11094] is supposedly 532 // fixed in the version of ICU4J currently in CLDR, but removing the logKnownIssue 533 // to execute the test results in test failures, mainly for i\u0307\u0308. 534 // So I am changing the logKnownIssue to reference a CLDR ticket about 535 // investigating the test (it may be wrong). 536 if (!logKnownIssue("cldrbug:13313", 537 "Investigate the Lithuanian casing test, it may be wrong")) { 538 Transliterator ltTitle = checkString( 539 "lt", 540 Casing.Title, 541 "I \u00CF J J\u0308 \u012E \u012E\u0308 \u00CC \u00CD \u0128 Xi\u0307\u0308 Xj\u0307\u0308 X\u012F\u0307\u0308 Xi\u0307\u0300 Xi\u0307\u0301 Xi\u0307\u0303 Xi Xi\u0307\u0308 Xj Xj\u0307\u0308 X\u012F X\u012F\u0307\u0308", 542 lithuanianSource, true); 543 Transliterator ltLower = checkString( 544 "lt", 545 Casing.Lower, 546 "i i\u0307\u0308 j j\u0307\u0308 \u012F \u012F\u0307\u0308 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 xi\u0307\u0308 xj\u0307\u0308 x\u012F\u0307\u0308 xi\u0307\u0300 xi\u0307\u0301 xi\u0307\u0303 xi xi\u0307\u0308 xj xj\u0307\u0308 x\u012F x\u012F\u0307\u0308", 547 lithuanianSource, true); 548 } 549 Transliterator ltUpper = checkString( 550 "lt", 551 Casing.Upper, 552 "I \u00CF J J\u0308 \u012E \u012E\u0308 \u00CC \u00CD \u0128 X\u00CF XJ\u0308 X\u012E\u0308 X\u00CC X\u00CD X\u0128 XI X\u00CF XJ XJ\u0308 X\u012E X\u012E\u0308", 553 lithuanianSource, true); 554 555 String dutchSource = "IJKIJ ijkij IjkIj"; 556 Transliterator nlTitle = checkString("nl", Casing.Title, 557 "IJkij IJkij IJkij", dutchSource, true); 558 // Transliterator nlLower = checkString("nl", Casing.Lower, "ısii isıı", 559 // turkishSource); 560 // Transliterator nlUpper = checkString("tr", Casing.Upper, "ISİİ İSII", 561 // turkishSource); 562 } 563 checkString(String locale, Casing casing, String expected, String source, boolean sameAsSpecialCasing)564 private Transliterator checkString(String locale, Casing casing, 565 String expected, String source, boolean sameAsSpecialCasing) { 566 Transliterator translit = Transliterator.getInstance(locale + "-" 567 + casing); 568 String result = checkString(locale, expected, source, translit); 569 ULocale ulocale = new ULocale(locale); 570 String specialCasing; 571 Normalizer2 normNFC = Normalizer2.getNFCInstance(); // UCharacter.toXxxCase 572 // doesn't 573 // normalize, 574 // Transliterator 575 // does 576 switch (casing) { 577 case Upper: 578 specialCasing = normNFC.normalize(UCharacter.toUpperCase(ulocale, 579 source)); 580 break; 581 case Title: 582 specialCasing = normNFC.normalize(UCharacter.toTitleCase(ulocale, 583 source, null)); 584 break; 585 case Lower: 586 specialCasing = normNFC.normalize(UCharacter.toLowerCase(ulocale, 587 source)); 588 break; 589 default: 590 throw new IllegalArgumentException(); 591 } 592 if (sameAsSpecialCasing) { 593 if (!assertEquals(locale + "-" + casing + " Vs SpecialCasing", 594 specialCasing, result)) { 595 showFirstDifference("Special: ", specialCasing, "Transform: ", 596 result); 597 } 598 } else { 599 assertNotEquals(locale + "-" + casing + "Vs SpecialCasing", 600 specialCasing, result); 601 } 602 return translit; 603 } 604 checkString(String locale, String expected, String source, Transliterator translit)605 private String checkString(String locale, String expected, String source, 606 Transliterator translit) { 607 String transformed = translit.transform(source); 608 if (!assertEquals(locale, expected, transformed)) { 609 showTransliterator(translit); 610 } 611 return transformed; 612 } 613 showFirstDifference(String titleA, String a, String titleB, String b)614 private void showFirstDifference(String titleA, String a, String titleB, 615 String b) { 616 StringBuilder buffer = new StringBuilder(); 617 for (int i = 0; i < Math.min(a.length(), b.length()); ++i) { 618 char aChar = a.charAt(i); 619 char bChar = b.charAt(i); 620 if (aChar == bChar) { 621 buffer.append(aChar); 622 } else { 623 errln("\t" + buffer + "\n\t\t" + titleA + "\t" 624 + Utility.hex(a.substring(i)) + "\n\t\t" + titleB 625 + "\t" + Utility.hex(b.substring(i))); 626 return; 627 } 628 } 629 errln("different length"); 630 } 631 showTransliterator(Transliterator t)632 private void showTransliterator(Transliterator t) { 633 org.unicode.cldr.test.TestTransforms.showTransliterator("", t, 999); 634 } 635 Test9925()636 public void Test9925() { 637 register(); 638 Transliterator pinyin = getTransliterator("und-Latn-t-und-hani"); 639 assertEquals("賈 bug", "jiǎ", pinyin.transform("賈")); 640 } 641 TestHiraKata()642 public void TestHiraKata() { // for CLDR-13127 and ... 643 register(); 644 Transliterator hiraKata = getTransliterator("Hiragana-Katakana"); 645 assertEquals("Hira-Kata", hiraKata.transform("゛゜ わ゙ ゟ"), "゛゜ ヷ ヨリ"); 646 } 647 TestZawgyiToUnicode10899()648 public void TestZawgyiToUnicode10899() { 649 // Some tests for the transformation of Zawgyi font encoding to Unicode Burmese. 650 Transliterator z2u = getTransliterator("my-t-my-s0-zawgyi"); 651 652 String z1 = 653 "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u1039\u1031\u1010\u103C"; 654 String expected = 655 "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u103A\u1010\u103D\u1031"; 656 657 String actual = z2u.transform(z1); 658 659 assertEquals("z1 to u1", expected, actual); 660 661 String z2 = "တကယ္ဆို အျငိႈးေတြမဲ႔ေသာလမ္းေသာလမ္းမွာ တိုႈျပန္ဆံုျကတဲ႔အခါ "; 662 expected = "တကယ်ဆို အငြှိုးတွေမဲ့သောလမ်းသောလမ်းမှာ တှိုပြန်ဆုံကြတဲ့အခါ "; 663 actual = z2u.transform(z2); 664 assertEquals("z2 to u2", expected, actual); 665 666 String z3 = "ျပန္လမ္းမဲ့ကၽြန္းအပိုင္း၄"; 667 expected = "ပြန်လမ်းမဲ့ကျွန်းအပိုင်း၎"; 668 actual = z2u.transform(z3); 669 assertEquals("z3 to u3", expected, actual); 670 } 671 TestUnicodeToZawgyi111107()672 public void TestUnicodeToZawgyi111107() { 673 // Some tests for the transformation from Unicode to Zawgyi font encoding 674 Transliterator u2z = getTransliterator("my-t-my-d0-zawgyi"); 675 676 String expected = 677 "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u1039\u1031\u1010\u103C"; 678 String u1 = 679 "\u1021\u102C\u100F\u102C\u1015\u102D\u102F\u1004\u103A\u1010\u103D\u1031"; 680 681 String actual = u2z.transform(u1); 682 683 assertEquals("u1 to z1", expected, actual); 684 685 expected = "တကယ္ဆို အၿငႇိဳးေတြမဲ့ေသာလမ္းေသာလမ္းမွာ တိႈျပန္ဆံုၾကတဲ့အခါ "; 686 String u2 = "တကယ်ဆို အငြှိုးတွေမဲ့သောလမ်းသောလမ်းမှာ တှိုပြန်ဆုံကြတဲ့အခါ "; 687 actual = u2z.transform(u2); 688 assertEquals("u2 to z2", expected, actual); 689 690 expected = "ျပန္လမ္းမဲ့ကြၽန္းအပိုင္း၄"; 691 String u3 = "ပြန်လမ်းမဲ့ကျွန်းအပိုင်း၎"; 692 actual = u2z.transform(u3); 693 assertEquals("u3 to z3", expected, actual); 694 } 695 } 696