• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /** */
2 package org.unicode.cldr.util;
3 
4 import com.google.common.collect.BiMap;
5 import com.google.common.collect.HashBiMap;
6 import com.google.common.collect.HashMultimap;
7 import com.google.common.collect.ImmutableSet;
8 import com.google.common.collect.Multimap;
9 import com.google.common.collect.Multimaps;
10 import com.google.common.collect.TreeMultimap;
11 import com.ibm.icu.lang.UScript;
12 import com.ibm.icu.text.RuleBasedTransliterator;
13 import com.ibm.icu.text.Transliterator;
14 import com.ibm.icu.text.UnicodeFilter;
15 import com.ibm.icu.util.ICUUncheckedIOException;
16 import java.io.File;
17 import java.io.IOException;
18 import java.io.Writer;
19 import java.util.Arrays;
20 import java.util.Collection;
21 import java.util.Collections;
22 import java.util.HashSet;
23 import java.util.LinkedHashSet;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.Map;
27 import java.util.Map.Entry;
28 import java.util.Set;
29 import java.util.TreeMap;
30 import java.util.TreeSet;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
33 import java.util.stream.Collectors;
34 import org.unicode.cldr.tool.LikelySubtags;
35 import org.unicode.cldr.util.DiscreteComparator.Builder;
36 
37 public class CLDRTransforms {
38 
39     public static final String TRANSFORM_DIR = (CLDRPaths.COMMON_DIRECTORY + "transforms/");
40 
41     static final CLDRTransforms SINGLETON = new CLDRTransforms();
42 
43     private static final boolean PARANOID = true;
44 
getInstance()45     public static CLDRTransforms getInstance() {
46         return SINGLETON;
47     }
48 
getShowProgress()49     public Appendable getShowProgress() {
50         return showProgress;
51     }
52 
setShowProgress(Appendable showProgress)53     public CLDRTransforms setShowProgress(Appendable showProgress) {
54         this.showProgress = showProgress;
55         return this;
56     }
57 
58     final Set<String> overridden = new HashSet<>();
59     // final DependencyOrder dependencyOrder = new DependencyOrder();
60 
61     //    static public class RegexFindFilenameFilter implements FilenameFilter {
62     //        Matcher matcher;
63     //
64     //        public RegexFindFilenameFilter(Matcher filter) {
65     //            matcher = filter;
66     //        }
67     //
68     //        @Override
69     //        public boolean accept(File dir, String name) {
70     //            return matcher.reset(name).find();
71     //        }
72     //    }
73 
74     /**
75      * @param dir TODO
76      * @param namesMatchingRegex TODO
77      * @param showProgress null if no progress needed
78      * @param skipDashTIds TODO
79      * @return
80      */
registerCldrTransforms( String dir, String namesMatchingRegex, Appendable showProgress, boolean keepDashTIds)81     public static void registerCldrTransforms(
82             String dir, String namesMatchingRegex, Appendable showProgress, boolean keepDashTIds) {
83         CLDRTransforms r = getInstance();
84         if (dir == null) {
85             dir = TRANSFORM_DIR;
86         }
87         // reorder to preload some
88         r.showProgress = showProgress;
89         Set<String> ordered = getFileRegistrationOrder(dir);
90 
91         if (namesMatchingRegex != null) {
92             Matcher filter = PatternCache.get(namesMatchingRegex).matcher("");
93             ordered =
94                     ordered.stream()
95                             .filter(x -> filter.reset(x).matches())
96                             .collect(Collectors.toCollection(LinkedHashSet::new));
97             //            r.deregisterIcuTransliterators(filter);
98             //            files = Arrays.asList(new File(TRANSFORM_DIR).list(new
99             // RegexFindFilenameFilter(filter)));
100             //            ordered = r.dependencyOrder.getOrderedItems(files, filter, true);
101         }
102 
103         // System.out.println(ordered);
104         for (String cldrFileName : ordered) {
105             r.registerTransliteratorsFromXML(
106                     dir, cldrFileName, Collections.emptySet(), keepDashTIds);
107         }
108         Transliterator.registerAny(); // do this last!
109     }
110 
getAvailableIds()111     public static List<String> getAvailableIds() {
112         return Arrays.asList(new File(TRANSFORM_DIR).list());
113     }
114 
getOverriddenTransliterators()115     public Set<String> getOverriddenTransliterators() {
116         return Collections.unmodifiableSet(overridden);
117     }
118 
119     static Transliterator fixup = Transliterator.getInstance("[:Mn:]any-hex/java");
120 
getInstance(String id)121     public Transliterator getInstance(String id) {
122         if (!overridden.contains(id)) {
123             throw new IllegalArgumentException("No overriden transform for " + id);
124         }
125         return Transliterator.getInstance(id);
126     }
127 
128     public static Pattern TRANSFORM_ID_PATTERN = PatternCache.get("(.+)-([^/]+)(/(.*))?");
129 
getReverseInstance(String id)130     public Transliterator getReverseInstance(String id) {
131         Matcher matcher = TRANSFORM_ID_PATTERN.matcher(id);
132         if (!matcher.matches()) {
133             throw new IllegalArgumentException("**No transform for " + id);
134         }
135         return getInstance(
136                 matcher.group(2)
137                         + "-"
138                         + matcher.group(1)
139                         + (matcher.group(4) == null ? "" : "/" + matcher.group(4)));
140     }
141 
142     private BiMap<String, String> displayNameToId = HashBiMap.create();
143 
getDisplayNameToId()144     public BiMap<String, String> getDisplayNameToId() {
145         return displayNameToId;
146     }
147 
addDisplayNameToId(Map<String, String> ids2, ParsedTransformID directionInfo)148     private void addDisplayNameToId(Map<String, String> ids2, ParsedTransformID directionInfo) {
149         displayNameToId.put(directionInfo.getDisplayId(), directionInfo.toString());
150     }
151 
registerTransliteratorsFromXML( String dir, String cldrFileName, Set<String> cantSkip, boolean keepDashTIds)152     public String registerTransliteratorsFromXML(
153             String dir, String cldrFileName, Set<String> cantSkip, boolean keepDashTIds) {
154         ParsedTransformID directionInfo = new ParsedTransformID();
155         String ruleString = getIcuRulesFromXmlFile(dir, cldrFileName, directionInfo);
156 
157         String id = directionInfo.getId();
158         addDisplayNameToId(displayNameToId, directionInfo);
159 
160         if (directionInfo.getDirection() == Direction.both
161                 || directionInfo.getDirection() == Direction.forward) {
162             for (String alias : directionInfo.getAliases()) {
163                 if (!keepDashTIds && alias.contains("-t-")) {
164                     continue;
165                 }
166                 Transliterator.unregister(alias);
167                 Transliterator.registerAlias(alias, id);
168             }
169             internalRegister(id, ruleString, Transliterator.FORWARD);
170         }
171         if (directionInfo.getDirection() == Direction.both
172                 || directionInfo.getDirection() == Direction.backward) {
173             for (String alias : directionInfo.getBackwardAliases()) {
174                 if (!keepDashTIds && alias.contains("-t-")) {
175                     continue;
176                 }
177                 Transliterator.unregister(alias);
178                 Transliterator.registerAlias(alias, directionInfo.getBackwardId());
179             }
180             internalRegister(id, ruleString, Transliterator.REVERSE);
181         }
182         return id;
183     }
184 
185     /**
186      * Return Icu rules, and the direction info
187      *
188      * @param dir TODO
189      * @param cldrFileName
190      * @param directionInfo
191      * @return
192      */
getIcuRulesFromXmlFile( String dir, String cldrFileName, ParsedTransformID directionInfo)193     public static String getIcuRulesFromXmlFile(
194             String dir, String cldrFileName, ParsedTransformID directionInfo) {
195         final MyHandler myHandler = new MyHandler(cldrFileName, directionInfo);
196         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
197         xfr.read(
198                 dir + cldrFileName,
199                 XMLFileReader.CONTENT_HANDLER | XMLFileReader.ERROR_HANDLER,
200                 true);
201         return myHandler.getRules();
202     }
203 
internalRegister(String id, String ruleString, int direction)204     private void internalRegister(String id, String ruleString, int direction) {
205         if (direction == Transliterator.REVERSE) {
206             id = ParsedTransformID.reverse(id);
207         }
208         internalRegisterNoReverseId(id, ruleString, direction);
209     }
210 
internalRegisterNoReverseId(String id, String ruleString, int direction)211     private void internalRegisterNoReverseId(String id, String ruleString, int direction) {
212         try {
213             Transliterator t = Transliterator.createFromRules(id, ruleString, direction);
214             overridden.add(id);
215             Transliterator oldTranslit = null;
216             if (showProgress != null) {
217                 try {
218                     oldTranslit = Transliterator.getInstance(id);
219                 } catch (Exception e) {
220                 }
221             }
222             Transliterator.unregister(id);
223             Transliterator.registerInstance(t);
224 
225             if (PARANOID) { // for paranoid testing
226                 String r1 =
227                         CLDRTransforms.showTransliterator("", t, 9999, new StringBuilder())
228                                 .toString();
229                 Transliterator t2 = Transliterator.getInstance(id);
230                 String r2 =
231                         CLDRTransforms.showTransliterator("", t2, 9999, new StringBuilder())
232                                 .toString();
233                 if (!r1.equals(r2)) {
234                     throw new IllegalArgumentException(
235                             "Rules unequal\n" + ruleString + "$$$\n$$$" + r2);
236                 }
237             }
238             // verifyNullFilter("halfwidth-fullwidth");
239             if (showProgress != null) {
240                 append(
241                         "Registered new Transliterator: "
242                                 + id
243                                 + (oldTranslit == null ? "" : "\told:\t" + oldTranslit.getID())
244                                 + '\n');
245                 if (id.startsWith("el-")) {
246                     CLDRTransforms.showTransliterator("", t, 999);
247                     Transliterator t2 = Transliterator.getInstance(id);
248                     CLDRTransforms.showTransliterator("", t2, 999);
249                 }
250             }
251         } catch (RuntimeException e) {
252             if (showProgress != null) {
253                 e.printStackTrace();
254                 append(
255                         "Couldn't register new Transliterator: "
256                                 + id
257                                 + "\t"
258                                 + e.getMessage()
259                                 + '\n');
260             } else {
261                 throw (IllegalArgumentException)
262                         new IllegalArgumentException("Couldn't register new Transliterator: " + id)
263                                 .initCause(e);
264             }
265         }
266     }
267 
268     Appendable showProgress;
269 
append(String string)270     private void append(String string) {
271         try {
272             if (showProgress == null) {
273                 return;
274             }
275             showProgress.append(string);
276             if (showProgress instanceof Writer) {
277                 ((Writer) showProgress).flush();
278             }
279         } catch (IOException e) {
280             throw new ICUUncheckedIOException(e);
281         }
282     }
283 
appendln(String s)284     private void appendln(String s) {
285         append(s + "\n");
286     }
287 
288     // ===================================
289 
290     //    @SuppressWarnings("deprecation")
291     //    public void registerFromIcuFormatFiles(String directory) throws IOException {
292     //
293     ////        deregisterIcuTransliterators((Matcher) null);
294     //
295     //        Matcher getId = PatternCache.get("\\s*(\\S*)\\s*\\{\\s*").matcher("");
296     //        Matcher getSource =
297     // PatternCache.get("\\s*(\\S*)\\s*\\{\\s*\\\"(.*)\\\".*").matcher("");
298     //        Matcher translitID = PatternCache.get("([^-]+)-([^/]+)+(?:[/](.+))?").matcher("");
299     //
300     //        Map<String, String> fixedIDs = new TreeMap<>();
301     //        Set<String> oddIDs = new TreeSet<>();
302     //
303     //        File dir = new File(directory);
304     //        // get the list of files to take, and their directions
305     //        BufferedReader input = FileUtilities.openUTF8Reader(directory, "root.txt");
306     //        String id = null;
307     //        String filename = null;
308     //        Map<String, String> aliasMap = new LinkedHashMap<>();
309     //
310     //        // deregisterIcuTransliterators();
311     //
312     //        // do first, since others depend on theseregisterFromIcuFile
313     //        /**
314     //         * Special aliases.
315     //         * Tone-Digit {
316     //         * alias {"Pinyin-NumericPinyin"}
317     //         * }
318     //         * Digit-Tone {
319     //         * alias {"NumericPinyin-Pinyin"}
320     //         * }
321     //         */
322     //        // registerFromIcuFile("Latin-ConjoiningJamo", directory, null);
323     //        // registerFromIcuFile("Pinyin-NumericPinyin", directory, null);
324     //        // Transliterator.registerAlias("Tone-Digit", "Pinyin-NumericPinyin");
325     //        // Transliterator.registerAlias("Digit-Tone", "NumericPinyin-Pinyin");
326     //        // registerFromIcuFile("Fullwidth-Halfwidth", directory, null);
327     //        // registerFromIcuFile("Hiragana-Katakana", directory, null);
328     //        // registerFromIcuFile("Latin-Katakana", directory, null);
329     //        // registerFromIcuFile("Hiragana-Latin", directory, null);
330     //
331     //        while (true) {
332     //            String line = input.readLine();
333     //            if (line == null) break;
334     //            line = line.trim();
335     //            if (line.startsWith("\uFEFF")) {
336     //                line = line.substring(1);
337     //            }
338     //            if (line.startsWith("TransliteratorNamePattern")) break; // done
339     //            // if (line.indexOf("Ethiopic") >= 0) {
340     //            // appendln("Skipping Ethiopic");
341     //            // continue;
342     //            // }
343     //            if (getId.reset(line).matches()) {
344     //                String temp = getId.group(1);
345     //                if (!temp.equals("file") && !temp.equals("internal")) id = temp;
346     //                continue;
347     //            }
348     //            if (getSource.reset(line).matches()) {
349     //                String operation = getSource.group(1);
350     //                String source = getSource.group(2);
351     //                if (operation.equals("alias")) {
352     //                    aliasMap.put(id, source);
353     //                    checkIdFix(id, fixedIDs, oddIDs, translitID);
354     //                    id = null;
355     //                } else if (operation.equals("resource:process(transliterator)")) {
356     //                    filename = source;
357     //                } else if (operation.equals("direction")) {
358     //                    try {
359     //                        if (id == null || filename == null) {
360     //                            // appendln("skipping: " + line);
361     //                            continue;
362     //                        }
363     //                        if (filename.indexOf("InterIndic") >= 0 && filename.indexOf("Latin")
364     // >= 0) {
365     //                            // append("**" + id);
366     //                        }
367     //                        checkIdFix(id, fixedIDs, oddIDs, translitID);
368     //
369     //                        final int direction = source.equals("FORWARD") ?
370     // Transliterator.FORWARD
371     //                            : Transliterator.REVERSE;
372     //                        registerFromIcuFile(id, directory, filename, direction);
373     //
374     //                        verifyNullFilter("halfwidth-fullwidth");
375     //
376     //                        id = null;
377     //                        filename = null;
378     //                    } catch (RuntimeException e) {
379     //                        throw (RuntimeException) new IllegalArgumentException("Failed with " +
380     // filename + ", " + source)
381     //                        .initCause(e);
382     //                    }
383     //                } else {
384     //                    append(dir + "root.txt unhandled line:" + line);
385     //                }
386     //                continue;
387     //            }
388     //            String trimmed = line.trim();
389     //            if (trimmed.equals("")) continue;
390     //            if (trimmed.equals("}")) continue;
391     //            if (trimmed.startsWith("//")) continue;
392     //            throw new IllegalArgumentException("Unhandled:" + line);
393     //        }
394     //
395     //        final Set<String> rawIds = idToRules.keySet();
396     //        Set<String> ordered = dependencyOrder.getOrderedItems(rawIds, null, false);
397     //        ordered.retainAll(rawIds); // since we are in ID space, kick out anything that isn't
398     //
399     //        for (String id2 : ordered) {
400     //            RuleDirection stuff = idToRules.get(id2);
401     //            internalRegisterNoReverseId(id2, stuff.ruleString, stuff.direction);
402     //            verifyNullFilter("halfwidth-fullwidth"); // TESTING
403     //        }
404     //
405     //        for (Iterator<String> it = aliasMap.keySet().iterator(); it.hasNext();) {
406     //            id = it.next();
407     //            String source = aliasMap.get(id);
408     //            Transliterator.unregister(id);
409     //            Transliterator t = Transliterator.createFromRules(id, "::" + source + ";",
410     // Transliterator.FORWARD);
411     //            Transliterator.registerInstance(t);
412     //            // verifyNullFilter("halfwidth-fullwidth");
413     //            appendln("Registered new Transliterator Alias: " + id);
414     //
415     //        }
416     //        appendln("Fixed IDs");
417     //        for (Iterator<String> it = fixedIDs.keySet().iterator(); it.hasNext();) {
418     //            String id2 = it.next();
419     //            appendln("\t" + id2 + "\t" + fixedIDs.get(id2));
420     //        }
421     //        appendln("Odd IDs");
422     //        for (Iterator<String> it = oddIDs.iterator(); it.hasNext();) {
423     //            String id2 = it.next();
424     //            appendln("\t" + id2);
425     //        }
426     //        Transliterator.registerAny(); // do this last!
427     //    }
428 
429     Map<String, RuleDirection> idToRules = new TreeMap<>();
430 
431     private class RuleDirection {
432         String ruleString;
433         int direction;
434 
RuleDirection(String ruleString, int direction)435         public RuleDirection(String ruleString, int direction) {
436             super();
437             this.ruleString = ruleString;
438             this.direction = direction;
439         }
440     }
441 
registerFromIcuFile(String id, String directory, String filename, int direction)442     private void registerFromIcuFile(String id, String directory, String filename, int direction) {
443         if (filename == null) {
444             filename = id.replace("-", "_").replace("/", "_") + ".txt";
445         }
446         String ruleString = CldrUtility.getText(directory, filename);
447         idToRules.put(id, new RuleDirection(ruleString, direction));
448     }
449 
450     // private void registerFromIcuFile(String id, String dir, String filename) {
451     // registerFromIcuFile(id, dir, filename, Transliterator.FORWARD);
452     // registerFromIcuFile(id, dir, filename, Transliterator.REVERSE);
453     // }
454 
checkIdFix( String id, Map<String, String> fixedIDs, Set<String> oddIDs, Matcher translitID)455     public void checkIdFix(
456             String id, Map<String, String> fixedIDs, Set<String> oddIDs, Matcher translitID) {
457         if (fixedIDs.containsKey(id)) return;
458         if (!translitID.reset(id).matches()) {
459             appendln("Can't fix: " + id);
460             fixedIDs.put(id, "?" + id);
461             return;
462         }
463         String source1 = translitID.group(1);
464         String target1 = translitID.group(2);
465         String variant = translitID.group(3);
466         String source = fixID(source1);
467         String target = fixID(target1);
468         if (!source1.equals(source)) {
469             fixedIDs.put(source1, source);
470         }
471         if (!target1.equals(target)) {
472             fixedIDs.put(target1, target);
473         }
474         if (variant != null) {
475             oddIDs.add("variant: " + variant);
476         }
477     }
478 
fixID(String source)479     static String fixID(String source) {
480         return source; // for now
481     }
482 
483     //    public void deregisterIcuTransliterators(Matcher filter) {
484     //        // Remove all of the current registrations
485     //        // first load into array, so we don't get sync problems.
486     //        List<String> rawAvailable = new ArrayList<>();
487     //        for (Enumeration<String> en = Transliterator.getAvailableIDs(); en.hasMoreElements();)
488     // {
489     //            final String id = en.nextElement();
490     //            if (filter != null && !filter.reset(id).matches()) {
491     //                continue;
492     //            }
493     //            rawAvailable.add(id);
494     //        }
495     //
496     //        // deregisterIcuTransliterators(rawAvailable);
497     //
498     //        Set<String> available = dependencyOrder.getOrderedItems(rawAvailable, filter, false);
499     //        List<String> reversed = new LinkedList<>();
500     //        for (String item : available) {
501     //            reversed.add(0, item);
502     //        }
503     //        // available.retainAll(rawAvailable); // remove the items we won't touch anyway
504     //        // rawAvailable.removeAll(available); // now the ones whose order doesn't matter
505     //        // deregisterIcuTransliterators(rawAvailable);
506     //        deregisterIcuTransliterators(reversed);
507     //
508     //        for (Enumeration<String> en = Transliterator.getAvailableIDs(); en.hasMoreElements();)
509     // {
510     //            String oldId = en.nextElement();
511     //            append("Retaining: " + oldId + "\n");
512     //        }
513     //    }
514     //
515     //    public void deregisterIcuTransliterators(Collection<String> available) {
516     //        for (String oldId : available) {
517     //            Transliterator t;
518     //            try {
519     //                t = Transliterator.getInstance(oldId);
520     //            } catch (IllegalArgumentException e) {
521     //                if (e.getMessage().startsWith("Illegal ID")) {
522     //                    continue;
523     //                }
524     //                append("Failure with: " + oldId);
525     //                t = Transliterator.getInstance(oldId);
526     //                throw e;
527     //            } catch (RuntimeException e) {
528     //                append("Failure with: " + oldId);
529     //                t = Transliterator.getInstance(oldId);
530     //                throw e;
531     //            }
532     //            String className = t.getClass().getName();
533     //            if (className.endsWith(".CompoundTransliterator")
534     //                || className.endsWith(".RuleBasedTransliterator")
535     //                || className.endsWith(".AnyTransliterator")) {
536     //                appendln("REMOVING: " + oldId);
537     //                Transliterator.unregister(oldId);
538     //            } else {
539     //                appendln("Retaining: " + oldId + "\t\t" + className);
540     //            }
541     //        }
542     //    }
543 
544     public enum Direction {
545         backward,
546         both,
547         forward
548     }
549 
550     public enum Visibility {
551         external,
552         internal
553     }
554 
555     public static class ParsedTransformID {
556         public String source = "Any";
557         public String target = "Any";
558         public String variant;
559         protected String[] aliases = {};
560         protected String[] backwardAliases = {};
561         protected Direction direction = null;
562         protected Visibility visibility;
563 
getId()564         public String getId() {
565             return getSource()
566                     + "-"
567                     + getTarget()
568                     + (getVariant() == null ? "" : "/" + getVariant());
569         }
570 
getDisplayId()571         public String getDisplayId() {
572             return getDisplaySource()
573                     + "-"
574                     + getDisplayTarget()
575                     + (getVariant() == null ? "" : "/" + getDisplayVariant());
576         }
577 
getDisplayVariant()578         private String getDisplayVariant() {
579             return getVariant();
580         }
581 
getDisplayTarget()582         private String getDisplayTarget() {
583             return getDisplaySourceOrTarget(getTarget());
584         }
585 
getDisplaySource()586         private String getDisplaySource() {
587             return getDisplaySourceOrTarget(getSource());
588         }
589 
getDisplaySourceOrTarget(String sourceOrTarget)590         private String getDisplaySourceOrTarget(String sourceOrTarget) {
591             int uscript = UScript.getCodeFromName(sourceOrTarget);
592             if (uscript >= 0) {
593                 return UScript.getName(uscript);
594             }
595             if (sourceOrTarget.contains("FONIPA")) {
596                 return "IPA";
597             }
598             if (sourceOrTarget.equals("InterIndic")) {
599                 return "Indic";
600             }
601             try {
602                 String name = CLDRConfig.getInstance().getEnglish().getName(sourceOrTarget);
603                 return name;
604             } catch (Exception e) {
605                 return sourceOrTarget;
606             }
607         }
608 
609         static final LikelySubtags likely = new LikelySubtags();
610 
getScriptCode(String sourceOrTarget)611         public static String getScriptCode(String sourceOrTarget) {
612             int uscript = UScript.getCodeFromName(sourceOrTarget);
613             if (uscript >= 0) {
614                 return UScript.getShortName(uscript);
615             }
616             if (sourceOrTarget.contains("FONIPA")) {
617                 return "Ipa0";
618             }
619             if (sourceOrTarget.equals("InterIndic")) {
620                 return "Ind0";
621             }
622             try {
623                 String max = likely.maximize(sourceOrTarget);
624                 return max == null ? null : new LanguageTagParser().set(max).getScript();
625             } catch (Exception e) {
626                 return null;
627             }
628         }
629 
getBackwardId()630         public String getBackwardId() {
631             return getTarget()
632                     + "-"
633                     + getSource()
634                     + (getVariant() == null ? "" : "/" + getVariant());
635         }
636 
ParsedTransformID()637         public ParsedTransformID() {}
638 
set( String source, String target, String variant, Direction direction)639         public ParsedTransformID set(
640                 String source, String target, String variant, Direction direction) {
641             this.source = source;
642             this.target = target;
643             this.variant = variant;
644             this.direction = direction;
645             return this;
646         }
647 
set(String id)648         public ParsedTransformID set(String id) {
649             variant = null;
650             int pos = id.indexOf('-');
651             if (pos < 0) {
652                 source = "Any";
653                 target = id;
654                 return this;
655             }
656             source = id.substring(0, pos);
657             int pos2 = id.indexOf('/', pos);
658             if (pos2 < 0) {
659                 target = id.substring(pos + 1);
660                 return this;
661             }
662             target = id.substring(pos + 1, pos2);
663             variant = id.substring(pos2 + 1);
664             return this;
665         }
666 
reverse()667         public ParsedTransformID reverse() {
668             String temp = source;
669             source = target;
670             target = temp;
671             return this;
672         }
673 
getTargetVariant()674         public String getTargetVariant() {
675             return target + (variant == null ? "" : "/" + variant);
676         }
677 
getSourceVariant()678         public String getSourceVariant() {
679             return source + (variant == null ? "" : "/" + variant);
680         }
681 
setDirection(Direction direction)682         protected void setDirection(Direction direction) {
683             this.direction = direction;
684         }
685 
getDirection()686         public Direction getDirection() {
687             return direction;
688         }
689 
setVariant(String variant)690         public void setVariant(String variant) {
691             this.variant = variant;
692         }
693 
getVariant()694         protected String getVariant() {
695             return variant;
696         }
697 
setTarget(String target)698         public void setTarget(String target) {
699             this.target = target;
700         }
701 
getTarget()702         public String getTarget() {
703             return target;
704         }
705 
setSource(String source)706         public void setSource(String source) {
707             this.source = source;
708         }
709 
getSource()710         public String getSource() {
711             return source;
712         }
713 
714         @Override
toString()715         public String toString() {
716             return source + "-" + getTargetVariant();
717         }
718 
getId(String source, String target, String variant)719         public static String getId(String source, String target, String variant) {
720             String id = source + '-' + target;
721             if (variant != null) id += "/" + variant;
722             return id;
723         }
724 
reverse(String id)725         public static String reverse(String id) {
726             return new ParsedTransformID().set(id).getBackwardId();
727         }
728 
setAliases(String[] aliases)729         public void setAliases(String[] aliases) {
730             this.aliases = aliases;
731         }
732 
getAliases()733         public String[] getAliases() {
734             return aliases;
735         }
736 
setBackwardAliases(String[] backwardAliases)737         public void setBackwardAliases(String[] backwardAliases) {
738             this.backwardAliases = backwardAliases;
739         }
740 
getBackwardAliases()741         public String[] getBackwardAliases() {
742             return backwardAliases;
743         }
744 
setVisibility(String string)745         protected void setVisibility(String string) {
746             visibility = Visibility.valueOf(string);
747         }
748 
getVisibility()749         public Visibility getVisibility() {
750             return visibility;
751         }
752     }
753 
754     /**
755      * Verify that if the transliterator exists, it has a null filter
756      *
757      * @param id
758      */
verifyNullFilter(String id)759     public static void verifyNullFilter(String id) {
760         Transliterator widen;
761         try {
762             widen = Transliterator.getInstance(id);
763         } catch (Exception e) {
764             return;
765         }
766         UnicodeFilter filter = widen.getFilter();
767         if (filter != null) {
768             throw new IllegalArgumentException(id + " has non-empty filter: " + filter);
769         }
770     }
771 
772     public static class MyHandler extends XMLFileReader.SimpleHandler {
773         boolean first = true;
774         ParsedTransformID directionInfo;
775         String cldrFileName;
776         StringBuilder rules = new StringBuilder();
777 
getRules()778         public String getRules() {
779             return rules.toString();
780         }
781 
MyHandler(String cldrFileName, ParsedTransformID directionInfo)782         public MyHandler(String cldrFileName, ParsedTransformID directionInfo) {
783             super();
784             this.cldrFileName = cldrFileName;
785             this.directionInfo = directionInfo;
786         }
787 
788         @Override
handlePathValue(String path, String value)789         public void handlePathValue(String path, String value) {
790             if (first) {
791                 if (path.startsWith("//supplementalData/version")) {
792                     return;
793                 } else if (path.startsWith("//supplementalData/generation")) {
794                     return;
795                 }
796                 XPathParts parts = XPathParts.getFrozenInstance(path);
797                 Map<String, String> attributes = parts.findAttributes("transform");
798                 if (attributes == null) {
799                     throw new IllegalArgumentException(
800                             "Not an XML transform file: " + cldrFileName + "\t" + path);
801                 }
802                 directionInfo.setSource(attributes.get("source"));
803                 directionInfo.setTarget(attributes.get("target"));
804                 directionInfo.setVariant(attributes.get("variant"));
805                 directionInfo.setDirection(
806                         Direction.valueOf(attributes.get("direction").toLowerCase(Locale.ENGLISH)));
807 
808                 String alias = attributes.get("alias");
809                 if (alias != null) {
810                     directionInfo.setAliases(alias.trim().split("\\s+"));
811                 }
812 
813                 String backwardAlias = attributes.get("backwardAlias");
814                 if (backwardAlias != null) {
815                     directionInfo.setBackwardAliases(backwardAlias.trim().split("\\s+"));
816                 }
817 
818                 directionInfo.setVisibility(attributes.get("visibility"));
819                 first = false;
820             }
821             if (path.indexOf("/comment") >= 0) {
822                 // skip
823             } else if (path.indexOf("/tRule") >= 0) {
824                 value = fixup.transliterate(value);
825                 rules.append(value).append(CldrUtility.LINE_SEPARATOR);
826             } else {
827                 throw new IllegalArgumentException("Unknown element: " + path + "\t " + value);
828             }
829         }
830     }
831 
832     static boolean ALREADY_REGISTERED = false;
833     /**
834      * Register just those transliterators that are different than ICU. TODO: check against the file
835      * system to make sure the list is accurate.
836      */
registerModified()837     public void registerModified() {
838         synchronized (CLDRTransforms.class) {
839             if (ALREADY_REGISTERED) {
840                 return;
841             }
842             // NEW
843             registerTranslit("Lao-Latin", "ບ", "b");
844             registerTranslit("Khmer-Latin", "ឥ", "ĕ");
845             registerTranslit("Sinhala-Latin", "ක", "ka");
846             registerTranslit("Japn-Latn", "譆", "aa");
847 
848             // MODIFIED
849             registerTranslit("Han-SpacedHan", "《", "«");
850             registerTranslit("Greek-Latin", "΄", "´");
851             registerTranslit("Hebrew-Latin", "־", "-");
852             registerTranslit("Cyrillic-Latin", "ө", "ö");
853             registerTranslit("Myanmar-Latin", "ဿ", "s");
854             registerTranslit("Latin-Armenian", "’", "՚");
855 
856             registerTranslit("Interindic-Latin", "\uE070", ".", "\uE03C", "\u0323", "\uE04D", "");
857 
858             registerTranslit("Malayalam-Interindic", "ൺ", "");
859             registerTranslit("Interindic-Malayalam", "", "ണ്");
860             registerTranslit("Malayalam-Latin", "ൺ", "ṇ");
861 
862             registerTranslit("Devanagari-Interindic", "ॲ", "\uE084");
863             registerTranslit("Devanagari-Latin", "ॲ", "æ");
864 
865             registerTranslit("Arabic-Latin", "؉", "‰");
866             ALREADY_REGISTERED = true;
867         }
868     }
869 
870     private static final ImmutableSet<String> noSkip = ImmutableSet.of();
871 
872     private static final boolean SHOW = false;
873     private static final boolean SHOW_FAILED_MATCHES = false;
874 
875     /** Register a transliterator and verify that a sample changed value is accurate */
registerTranslit(String ID, String... sourcePairs)876     public void registerTranslit(String ID, String... sourcePairs) {
877         String internalId = registerTransliteratorsFromXML(TRANSFORM_DIR, ID, noSkip, true);
878         Transliterator.registerAny(); // do this last!
879         Transliterator t = null;
880         try {
881             t = Transliterator.getInstance(internalId);
882         } catch (Exception e) {
883             System.out.println("For " + ID + " (" + internalId + ")");
884             e.printStackTrace();
885             return;
886         }
887         testSourceTarget(t, sourcePairs);
888     }
889 
showTransliterator(String prefix, Transliterator t, int limit)890     public static void showTransliterator(String prefix, Transliterator t, int limit) {
891         showTransliterator(prefix, t, limit, System.out);
892         System.out.flush();
893     }
894 
showTransliterator( String prefix, Transliterator t, int limit, T output)895     public static <T extends Appendable> T showTransliterator(
896             String prefix, Transliterator t, int limit, T output) {
897         if (!prefix.isEmpty()) {
898             prefix += " ";
899         }
900         try {
901             output.append(prefix + "ID:\t" + t.getID() + "\n");
902             output.append(prefix + "Class:\t" + t.getClass().getName() + "\n");
903             if (t.getFilter() != null) {
904                 output.append(prefix + "Filter:\t" + t.getFilter().toPattern(false) + "\n");
905             }
906             if (t instanceof RuleBasedTransliterator) {
907                 RuleBasedTransliterator rbt = (RuleBasedTransliterator) t;
908                 String[] rules = rbt.toRules(true).split("\n");
909                 int length = rules.length;
910                 if (limit >= 0 && limit < length) length = limit;
911                 output.append(prefix + "Rules:\n");
912                 prefix += "\t";
913                 for (int i = 0; i < length; ++i) {
914                     output.append(prefix + rules[i] + "\n");
915                 }
916             } else {
917                 Transliterator[] elements = t.getElements();
918                 if (elements[0] == t) {
919                     output.append(prefix + "\tNonRuleBased\n");
920                     return output;
921                 } else {
922                     prefix += "\t";
923                     for (int i = 0; i < elements.length; ++i) {
924                         showTransliterator(prefix, elements[i], limit, output);
925                     }
926                 }
927             }
928         } catch (IOException e) {
929             throw new ICUUncheckedIOException(e);
930         }
931         return output;
932     }
933 
testSourceTarget(Transliterator t, String... sourcePairs)934     public static void testSourceTarget(Transliterator t, String... sourcePairs) {
935         for (int i = 0; i < sourcePairs.length; i += 2) {
936             String sourceTest = sourcePairs[i];
937             String targetTest = sourcePairs[i + 1];
938             String target = t.transform(sourceTest);
939             if (!target.equals(targetTest)) {
940                 throw new IllegalArgumentException(
941                         t.getID()
942                                 + " For "
943                                 + sourceTest
944                                 + ", expected "
945                                 + targetTest
946                                 + ", got "
947                                 + target);
948             }
949         }
950     }
951 
952     /**
953      * Gets a transform from a script to Latin. for testing For a locale, use
954      * ExemplarUtilities.getScript(locale) to get the script
955      */
getTestingLatinScriptTransform(final String script)956     public static Transliterator getTestingLatinScriptTransform(final String script) {
957         String id;
958 
959         switch (script) {
960             case "Latn":
961                 return null;
962             case "Khmr":
963                 id = "Khmr-Latn/UNGEGN";
964                 break;
965             case "Laoo":
966                 id = "Laoo-Latn/UNGEGN";
967                 break;
968             case "Sinh":
969                 id = "Sinh-Latn/UNGEGN";
970                 break;
971             case "Japn":
972                 id = "Jpan-Latn";
973                 break;
974             case "Kore":
975                 id = "Hangul-Latn";
976                 break;
977             case "Hant":
978             case "Hans":
979                 id = "Han-Latn";
980                 break;
981             case "Olck":
982                 id = "sat_Olck-sat_FONIPA"; // Latin IPA
983                 break;
984             case "Cher":
985                 id = "chr-chr_FONIPA";
986                 break;
987             default:
988                 id = script + "-Latn";
989         }
990         return Transliterator.getInstance(id);
991     }
992 
993     /**
994      * Returns the set of all files that can be registered, in an order that makes sure that all
995      * dependencies are handled. That is, if X uses Y in its rules, then Y has to come before X.
996      *
997      * <p>The problem is that when you build a transliterator from rules, and one of those rules is
998      * to call another transliterator X, it inserts the <b>currently</b> registered transliterator
999      * into the transliterator being built. So whenever a transliterator X is changed, you have to
1000      * reregister every transliterator that calls X. Otherwise the old version of X sticks around in
1001      * those calling transliterators. So the order that you register transliterators is important!
1002      */
getFileRegistrationOrder(String dir)1003     public static Set<String> getFileRegistrationOrder(String dir) {
1004         if (dir == null) {
1005             dir = TRANSFORM_DIR;
1006         }
1007         List<String> files = getAvailableIds();
1008         Multimap<String, String> fileToAliases = HashMultimap.create();
1009         Multimap<String, String> fileToDependencies = TreeMultimap.create();
1010         for (String file : files) {
1011             // Very simple test that depends on standard format
1012             // eg
1013             //            ::[॑ ॒ ॔ ॓ ़ ँ-ः । ॥ ॰ ०-९ ॐ ॲ ऄ-ऋ ॠ ऌ ॡ ऍ-कक़ खख़ गग़ घ-जज़ झ-डड़ ढढ़ ण-फफ़ ब-यय़
1014             // र-ह ऽ ॽ ा-ॄ ॢ ॣ ॅ-्];
1015             //            ::NFD;
1016             //            ::Devanagari-InterIndic;
1017             //            ::InterIndic-Latin;
1018             //            ::NFC;
1019             ParsedTransformID directionInfo = new ParsedTransformID();
1020             String ruleString = getIcuRulesFromXmlFile(dir, file, directionInfo);
1021             Set<String> others = new LinkedHashSet<>();
1022             Set<String> order =
1023                     ruleString
1024                             .lines()
1025                             .map(x -> x.trim())
1026                             .filter(x -> x.contains("::") && !x.trim().startsWith("#"))
1027                             .map(x -> parseDoubleColon(x, others))
1028                             .collect(Collectors.toCollection(LinkedHashSet::new));
1029             order.addAll(others);
1030             if (SHOW) {
1031                 System.out.println(file + "=>" + order);
1032             }
1033             if (!order.isEmpty()) {
1034                 fileToDependencies.putAll(file, order);
1035             }
1036             if (directionInfo.direction != Direction.backward) { // that is, forward or both
1037                 fileToAliases.put(file, directionInfo.getId());
1038                 fileToAliases.putAll(file, Arrays.asList(directionInfo.getAliases()));
1039                 if (SHOW) {
1040                     System.out.println(
1041                             "\t"
1042                                     + directionInfo.getId()
1043                                     + "\t"
1044                                     + Arrays.asList(directionInfo.getAliases()));
1045                 }
1046             }
1047             if (directionInfo.direction != Direction.forward) { // that is, backward or both
1048                 fileToAliases.put(file, directionInfo.getBackwardId());
1049                 fileToAliases.putAll(file, Arrays.asList(directionInfo.getBackwardAliases()));
1050                 if (SHOW) {
1051                     System.out.println(
1052                             "\t"
1053                                     + directionInfo.getBackwardId()
1054                                     + "\t"
1055                                     + Arrays.asList(directionInfo.getBackwardAliases()));
1056                 }
1057             }
1058         }
1059         TreeMultimap<String, String> aliasesToFile =
1060                 Multimaps.invertFrom(fileToAliases, TreeMultimap.create());
1061         Multimap<String, String> fileToDependentFiles = TreeMultimap.create();
1062 
1063         for (Entry<String, Collection<String>> entry : fileToDependencies.asMap().entrySet()) {
1064             Set<String> v =
1065                     entry.getValue().stream()
1066                             .filter(x -> aliasesToFile.containsKey(x))
1067                             .map(y -> aliasesToFile.get(y).first())
1068                             .collect(Collectors.toSet());
1069             fileToDependentFiles.putAll(entry.getKey(), v);
1070         }
1071         Builder<String> comp = new DiscreteComparator.Builder<>(null);
1072         fileToDependentFiles.forEach(
1073                 (x, y) -> {
1074                     if (SHOW) {
1075                         System.out.println(x + "=" + y);
1076                     }
1077                     comp.add(y, x); // put dependent earlier
1078                 });
1079         // .add("c", "d", "b", "a").add("m", "n", "d").get();
1080 
1081         DiscreteComparator<String> comp2 = comp.get();
1082         Set<String> orderedDependents = new LinkedHashSet<>(comp2.getOrdering());
1083         orderedDependents.retainAll(
1084                 fileToDependentFiles.values()); // remove files that are not dependents
1085         Set<String> remainingFiles = new TreeSet<>(files);
1086         remainingFiles.removeAll(orderedDependents);
1087         orderedDependents.addAll(remainingFiles);
1088         if (SHOW_FAILED_MATCHES) {
1089             System.out.println(orderedDependents);
1090         }
1091         return ImmutableSet.copyOf(orderedDependents);
1092     }
1093     // fails match: :: [:Latin:] fullwidth-halfwidth ();
1094 
1095     static final Pattern TRANSLIT_FINDER =
1096             Pattern.compile(
1097                     "\\s*::\\s*"
1098                             + "(?:\\[[^\\]]+\\]\\s*)?"
1099                             + "([A-Za-z0-9////_//-]*)?"
1100                             + "(?:"
1101                             + "\\s*\\("
1102                             + "(?:\\[[^\\]]+\\]\\s*)?"
1103                             + "([A-Za-z0-9////_//-]*)?"
1104                             + "\\s*\\)"
1105                             + ")?"
1106                             + "\\s*;\\s*(#.*)?");
1107     //    static {
1108     //        Matcher matcher = TRANSLIT_FINDER.matcher("::[:Latin:] fullwidth-halfwidth();");
1109     //        System.out.println(matcher.matches());
1110     //    }
1111 
parseDoubleColon(String x, Set<String> others)1112     static String parseDoubleColon(String x, Set<String> others) {
1113         Matcher matcher = TRANSLIT_FINDER.matcher(x);
1114         if (matcher.matches()) {
1115             String first = matcher.group(1);
1116             String second = matcher.group(2);
1117             if (SHOW) {
1118                 System.out.println("1: " + first + "\t2:" + second);
1119             }
1120             if (second != null && !second.isBlank()) {
1121                 others.add(second);
1122             }
1123             return first == null || first.isBlank() ? "" : first;
1124         } else {
1125             if (SHOW_FAILED_MATCHES) {
1126                 System.out.println("fails match: " + x);
1127             }
1128         }
1129         return "";
1130     }
1131 
1132     public class CLDRTransformsJsonIndex {
1133         /** raw list of available IDs */
1134         public String[] available =
1135                 getAvailableIds().stream()
1136                         .map((String id) -> id.replace(".xml", ""))
1137                         .sorted()
1138                         .collect(Collectors.toList())
1139                         .toArray(new String[0]);
1140     }
1141 
1142     /** This gets the metadata (index file) exposed as cldr-json/cldr-transforms/transforms.json */
getJsonIndex()1143     public CLDRTransformsJsonIndex getJsonIndex() {
1144         final CLDRTransformsJsonIndex index = new CLDRTransformsJsonIndex();
1145         return index;
1146     }
1147 }
1148