• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.ImmutableSortedSet;
6 import com.ibm.icu.impl.Utility;
7 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
8 import com.ibm.icu.text.UnicodeSet;
9 import java.io.IOException;
10 import java.util.HashSet;
11 import java.util.LinkedHashMap;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 import org.unicode.cldr.draft.FileUtilities;
20 import org.unicode.cldr.test.DisplayAndInputProcessor;
21 import org.unicode.cldr.tool.Option.Options;
22 import org.unicode.cldr.tool.Option.Params;
23 import org.unicode.cldr.util.Annotations;
24 import org.unicode.cldr.util.Annotations.AnnotationSet;
25 import org.unicode.cldr.util.CLDRConfig;
26 import org.unicode.cldr.util.CLDRFile;
27 import org.unicode.cldr.util.CLDRPaths;
28 import org.unicode.cldr.util.CLDRTreeWriter;
29 import org.unicode.cldr.util.CldrUtility;
30 import org.unicode.cldr.util.Emoji;
31 import org.unicode.cldr.util.Factory;
32 import org.unicode.cldr.util.Level;
33 import org.unicode.cldr.util.LocaleNames;
34 import org.unicode.cldr.util.Organization;
35 import org.unicode.cldr.util.SimpleXMLSource;
36 import org.unicode.cldr.util.StandardCodes;
37 import org.unicode.cldr.util.XPathParts.Comments.CommentType;
38 
39 public class GenerateDerivedAnnotations {
40     // The guts for derived names is in Annotations.synthesize
41     // Run CLDRModify afterwards: see
42     // https://docs.google.com/document/d/1h4xeKyEwCFnFvfN_szExBJcHaHQ8-tgoJTNMPQEpg8Q/edit for
43     // example
44     // Then run
45     // https://sites.google.com/site/cldr/internal-development/cldr-development-site/generate-algorithmic-locales
46 
47     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
48 
49     static final UnicodeSet SKIP =
50             new UnicodeSet()
51                     .add(Annotations.ENGLISH_MARKER)
52                     .add(Annotations.BAD_MARKER)
53                     .add(Annotations.MISSING_MARKER)
54                     .freeze();
55 
56     static Map<String, String> codepointToIsoCurrencyCode;
57 
58     static {
59         final Splitter tabSplitter = Splitter.on('\t').trimResults();
60         Map<String, String> _codepointToIsoCurrencyCode = new TreeMap<>();
61         for (String line :
62                 FileUtilities.in(CldrUtility.class, "data/codepointToIsoCurrencyCode.tsv")) {
63             if (line.startsWith("#")) {
64                 continue;
65             }
66             List<String> parts = tabSplitter.splitToList(line);
67             _codepointToIsoCurrencyCode.put(parts.get(0), parts.get(1));
68         }
69         codepointToIsoCurrencyCode = ImmutableMap.copyOf(_codepointToIsoCurrencyCode);
70     }
71 
72     private enum MyOptions {
73         fileFilter(
74                 new Params()
75                         .setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en")
76                         .setMatch(".*")
77                         .setDefault(".*")),
78         missing(new Params().setHelp("only missing").setMatch("")),
79         ;
80 
81         // BOILERPLATE TO COPY
82         final Option option;
83 
MyOptions(Params params)84         private MyOptions(Params params) {
85             option = new Option(this, params);
86         }
87 
88         private static Options myOptions = new Options();
89 
90         static {
91             for (MyOptions option : MyOptions.values()) {
myOptions.add(option, option.option)92                 myOptions.add(option, option.option);
93             }
94         }
95 
parse(String[] args)96         private static Set<String> parse(String[] args) {
97             return myOptions.parse(MyOptions.values()[0], args, true);
98         }
99     }
100 
main(String[] args)101     public static void main(String[] args) throws IOException {
102         MyOptions.parse(args);
103 
104         boolean missingOnly = MyOptions.missing.option.doesOccur();
105         if (missingOnly) {
106             System.out.println(
107                     "With the 'missing' argument files will not be written, only the missing items will be written to the console");
108         }
109 
110         Matcher localeMatcher = Pattern.compile(MyOptions.fileFilter.option.getValue()).matcher("");
111         Joiner BAR = Joiner.on(" | ");
112         AnnotationSet enAnnotations = Annotations.getDataSet("en");
113         CLDRFile english = CLDR_CONFIG.getEnglish();
114 
115         UnicodeSet derivables =
116                 new UnicodeSet(Emoji.getAllRgiNoES())
117                         .addAll(codepointToIsoCurrencyCode.keySet())
118                         .removeAll(enAnnotations.keySet())
119                         .freeze();
120 
121         for (String d : derivables) {
122             if (d.contains("����")) {
123                 System.out.println(d + "\t" + Utility.hex(d));
124             }
125         }
126 
127         Map<String, UnicodeSet> localeToFailures = new LinkedHashMap<>();
128         Set<String> locales = ImmutableSortedSet.copyOf(Annotations.getAvailable());
129         final Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory();
130         final Map<String, Integer> failureMap = new TreeMap<>();
131         int processCount = 0;
132 
133         CLDRTreeWriter treeWriter =
134                 new CLDRTreeWriter(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived");
135 
136         for (String locale : locales) {
137             if (LocaleNames.ROOT.equals(locale)) {
138                 continue;
139             }
140             if (!localeMatcher.reset(locale).matches()) {
141                 continue;
142             }
143             processCount++;
144             UnicodeSet failures = new UnicodeSet(Emoji.getAllRgiNoES());
145             localeToFailures.put(locale, failures);
146 
147             AnnotationSet annotations;
148             try {
149                 annotations = Annotations.getDataSet(locale);
150                 failures.removeAll(annotations.getExplicitValues());
151             } catch (Exception e) {
152                 System.out.println(
153                         "Can't create annotations for: " + locale + "\n\t" + e.getMessage());
154                 annotations = Annotations.getDataSet(locale);
155                 continue;
156             }
157             CLDRFile target = new CLDRFile(new SimpleXMLSource(locale));
158             CLDRFile main = null;
159             DisplayAndInputProcessor DAIP = new DisplayAndInputProcessor(target);
160             Exception[] internalException = new Exception[1];
161 
162             target.addComment(
163                     "//ldml",
164                     "Derived short names and annotations, using GenerateDerivedAnnotations.java. See warnings in /annotations/ file.",
165                     CommentType.PREBLOCK);
166             for (String derivable : derivables) {
167                 String shortName = null;
168                 try {
169                     shortName = annotations.getShortName(derivable);
170                 } catch (Exception e) {
171                 }
172 
173                 if (shortName == null) {
174                     String currencyCode = codepointToIsoCurrencyCode.get(derivable);
175                     if (currencyCode != null) {
176                         if (main == null) {
177                             main = cldrFactory.make(locale, true);
178                         }
179                         shortName = main.getName(CLDRFile.CURRENCY_NAME, currencyCode);
180                         if (shortName.contentEquals(currencyCode)) {
181                             shortName = null; // don't want fallback raw code
182                         }
183                     }
184                 }
185 
186                 if (shortName == null || SKIP.containsSome(shortName)) {
187                     continue; // missing
188                 }
189                 Set<String> keywords = annotations.getKeywordsMinus(derivable);
190                 String path = "//ldml/annotations/annotation[@cp=\"" + derivable + "\"]";
191                 if (!keywords.isEmpty()) {
192                     Set<String> keywordsFixed = new HashSet<>();
193                     for (String keyword : keywords) {
194                         if (!SKIP.containsSome(keyword)) {
195                             keywordsFixed.add(keyword);
196                         }
197                     }
198                     if (!keywordsFixed.isEmpty()) {
199                         String value = BAR.join(keywordsFixed);
200                         String newValue = DAIP.processInput(path, value, internalException);
201                         target.add(path, newValue);
202                     }
203                 }
204                 failures.remove(derivable);
205                 String ttsPath = path + "[@type=\"tts\"]";
206                 String shortName2 = DAIP.processInput(path, shortName, internalException);
207                 target.add(ttsPath, shortName2);
208             }
209             failures.freeze();
210             if (!failures.isEmpty()) {
211                 Level level =
212                         StandardCodes.make().getLocaleCoverageLevel(Organization.cldr, locale);
213                 System.out.println(
214                         "Failures\t"
215                                 + locale
216                                 + "\t"
217                                 + level
218                                 + "\t"
219                                 + english.getName(locale)
220                                 + "\t"
221                                 + failures.size()
222                                 + "\t"
223                                 + failures.toPattern(false));
224                 failureMap.put(locale, failures.size());
225             }
226             if (missingOnly) {
227                 continue;
228             }
229 
230             treeWriter.write(target);
231         }
232         Factory factory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", ".*");
233         for (String locale : locales) {
234             if (LocaleNames.ROOT.equals(locale)) {
235                 continue;
236             }
237             if (!localeMatcher.reset(locale).matches()) {
238                 continue;
239             }
240             CLDRFile cldrFileUnresolved = factory.make(locale, false);
241             CLDRFile cldrFileResolved = factory.make(locale, true);
242             Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier
243             boolean gotOne = false;
244             for (String xpath : cldrFileUnresolved) {
245                 if (xpath.startsWith("//ldml/identity")) {
246                     continue;
247                 }
248 
249                 String value = cldrFileUnresolved.getStringValue(xpath);
250 
251                 // remove items that are the same as their bailey values. This also catches
252                 // Inheritance Marker
253 
254                 String bailey = cldrFileResolved.getBaileyValue(xpath, null, null);
255                 if (value.equals(bailey)) {
256                     toRemove.add(xpath);
257                     continue;
258                 }
259                 gotOne = true;
260             }
261             if (!gotOne) {
262                 if (locale.equals("sr_Cyrl")) {
263                     System.err.println("TODO: keep from deleting files with non-empty children");
264                 } else {
265                     System.out.println("Removing empty " + locale);
266                     treeWriter.delete(locale);
267                 }
268             } else if (!toRemove.isEmpty()) {
269                 System.out.println("Removing " + toRemove.size() + " items from " + locale);
270                 CLDRFile fileToWrite = cldrFileUnresolved.cloneAsThawed();
271                 fileToWrite.removeAll(toRemove, false);
272                 treeWriter.write(fileToWrite);
273             }
274         }
275         treeWriter.close();
276         System.out.println(
277                 "Be sure to run CLDRModify passes afterwards, and generate transformed locales (like de-CH).");
278         if (!failureMap.isEmpty()) {
279             failureMap
280                     .entrySet()
281                     .forEach(
282                             e ->
283                                     System.err.printf(
284                                             "ERROR: %s: %d errors\n", e.getKey(), e.getValue()));
285             System.err.printf("ERROR: Errors in %d/%d locales.\n", failureMap.size(), processCount);
286             System.exit(1);
287         } else if (processCount == 0) {
288             System.err.println("ERROR: No locales matched. Check the -f option.\n");
289             System.exit(1);
290         } else {
291             System.out.printf("OK: %d locales processed without error\n", processCount);
292             System.exit(0);
293         }
294     }
295 }
296