• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.ImmutableSortedSet;
6 import com.ibm.icu.impl.Utility;
7 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
8 import com.ibm.icu.text.UnicodeSet;
9 import java.io.File;
10 import java.io.IOException;
11 import java.io.PrintWriter;
12 import java.util.HashSet;
13 import java.util.LinkedHashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21 import org.unicode.cldr.draft.FileUtilities;
22 import org.unicode.cldr.test.DisplayAndInputProcessor;
23 import org.unicode.cldr.tool.Option.Options;
24 import org.unicode.cldr.tool.Option.Params;
25 import org.unicode.cldr.util.Annotations;
26 import org.unicode.cldr.util.Annotations.AnnotationSet;
27 import org.unicode.cldr.util.CLDRConfig;
28 import org.unicode.cldr.util.CLDRFile;
29 import org.unicode.cldr.util.CLDRPaths;
30 import org.unicode.cldr.util.CldrUtility;
31 import org.unicode.cldr.util.Emoji;
32 import org.unicode.cldr.util.Factory;
33 import org.unicode.cldr.util.Level;
34 import org.unicode.cldr.util.LocaleNames;
35 import org.unicode.cldr.util.Organization;
36 import org.unicode.cldr.util.SimpleXMLSource;
37 import org.unicode.cldr.util.StandardCodes;
38 import org.unicode.cldr.util.XPathParts.Comments.CommentType;
39 
40 public class GenerateDerivedAnnotations {
41     // The guts for derived names is in Annotations.synthesize
42     // Run CLDRModify afterwards: see
43     // https://docs.google.com/document/d/1h4xeKyEwCFnFvfN_szExBJcHaHQ8-tgoJTNMPQEpg8Q/edit for
44     // example
45     // Then run
46     // https://sites.google.com/site/cldr/internal-development/cldr-development-site/generate-algorithmic-locales
47 
48     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
49 
50     static final UnicodeSet SKIP =
51             new UnicodeSet()
52                     .add(Annotations.ENGLISH_MARKER)
53                     .add(Annotations.BAD_MARKER)
54                     .add(Annotations.MISSING_MARKER)
55                     .freeze();
56 
57     static Map<String, String> codepointToIsoCurrencyCode;
58 
59     static {
60         final Splitter tabSplitter = Splitter.on('\t').trimResults();
61         Map<String, String> _codepointToIsoCurrencyCode = new TreeMap<>();
62         for (String line :
63                 FileUtilities.in(CldrUtility.class, "data/codepointToIsoCurrencyCode.tsv")) {
64             if (line.startsWith("#")) {
65                 continue;
66             }
67             List<String> parts = tabSplitter.splitToList(line);
68             _codepointToIsoCurrencyCode.put(parts.get(0), parts.get(1));
69         }
70         codepointToIsoCurrencyCode = ImmutableMap.copyOf(_codepointToIsoCurrencyCode);
71     }
72 
73     private enum MyOptions {
74         fileFilter(
75                 new Params()
76                         .setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en")
77                         .setMatch(".*")
78                         .setDefault(".*")),
79         missing(new Params().setHelp("only missing").setMatch("")),
80         ;
81 
82         // BOILERPLATE TO COPY
83         final Option option;
84 
MyOptions(Params params)85         private MyOptions(Params params) {
86             option = new Option(this, params);
87         }
88 
89         private static Options myOptions = new Options();
90 
91         static {
92             for (MyOptions option : MyOptions.values()) {
myOptions.add(option, option.option)93                 myOptions.add(option, option.option);
94             }
95         }
96 
parse(String[] args)97         private static Set<String> parse(String[] args) {
98             return myOptions.parse(MyOptions.values()[0], args, true);
99         }
100     }
101 
main(String[] args)102     public static void main(String[] args) throws IOException {
103         MyOptions.parse(args);
104 
105         boolean missingOnly = MyOptions.missing.option.doesOccur();
106         if (missingOnly) {
107             System.out.println(
108                     "With the 'missing' argument files will not be written, only the missing items will be written to the console");
109         }
110 
111         Matcher localeMatcher = Pattern.compile(MyOptions.fileFilter.option.getValue()).matcher("");
112         Joiner BAR = Joiner.on(" | ");
113         AnnotationSet enAnnotations = Annotations.getDataSet("en");
114         CLDRFile english = CLDR_CONFIG.getEnglish();
115 
116         UnicodeSet derivables =
117                 new UnicodeSet(Emoji.getAllRgiNoES())
118                         .addAll(codepointToIsoCurrencyCode.keySet())
119                         .removeAll(enAnnotations.keySet())
120                         .freeze();
121 
122         for (String d : derivables) {
123             if (d.contains("����")) {
124                 System.out.println(d + "\t" + Utility.hex(d));
125             }
126         }
127 
128         Map<String, UnicodeSet> localeToFailures = new LinkedHashMap<>();
129         Set<String> locales = ImmutableSortedSet.copyOf(Annotations.getAvailable());
130         final Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory();
131         final Map<String, Integer> failureMap = new TreeMap<>();
132         int processCount = 0;
133 
134         for (String locale : locales) {
135             if (LocaleNames.ROOT.equals(locale)) {
136                 continue;
137             }
138             if (!localeMatcher.reset(locale).matches()) {
139                 continue;
140             }
141             processCount++;
142             UnicodeSet failures = new UnicodeSet(Emoji.getAllRgiNoES());
143             localeToFailures.put(locale, failures);
144 
145             AnnotationSet annotations;
146             try {
147                 annotations = Annotations.getDataSet(locale);
148                 failures.removeAll(annotations.getExplicitValues());
149             } catch (Exception e) {
150                 System.out.println(
151                         "Can't create annotations for: " + locale + "\n\t" + e.getMessage());
152                 annotations = Annotations.getDataSet(locale);
153                 continue;
154             }
155             CLDRFile target = new CLDRFile(new SimpleXMLSource(locale));
156             CLDRFile main = null;
157             DisplayAndInputProcessor DAIP = new DisplayAndInputProcessor(target);
158             Exception[] internalException = new Exception[1];
159 
160             target.addComment(
161                     "//ldml",
162                     "Derived short names and annotations, using GenerateDerivedAnnotations.java. See warnings in /annotations/ file.",
163                     CommentType.PREBLOCK);
164             for (String derivable : derivables) {
165                 String shortName = null;
166                 try {
167                     shortName = annotations.getShortName(derivable);
168                 } catch (Exception e) {
169                 }
170 
171                 if (shortName == null) {
172                     String currencyCode = codepointToIsoCurrencyCode.get(derivable);
173                     if (currencyCode != null) {
174                         if (main == null) {
175                             main = cldrFactory.make(locale, true);
176                         }
177                         shortName = main.getName(CLDRFile.CURRENCY_NAME, currencyCode);
178                         if (shortName.contentEquals(currencyCode)) {
179                             shortName = null; // don't want fallback raw code
180                         }
181                     }
182                 }
183 
184                 if (shortName == null || SKIP.containsSome(shortName)) {
185                     continue; // missing
186                 }
187                 Set<String> keywords = annotations.getKeywordsMinus(derivable);
188                 String path = "//ldml/annotations/annotation[@cp=\"" + derivable + "\"]";
189                 if (!keywords.isEmpty()) {
190                     Set<String> keywordsFixed = new HashSet<>();
191                     for (String keyword : keywords) {
192                         if (!SKIP.containsSome(keyword)) {
193                             keywordsFixed.add(keyword);
194                         }
195                     }
196                     if (!keywordsFixed.isEmpty()) {
197                         String value = BAR.join(keywordsFixed);
198                         String newValue = DAIP.processInput(path, value, internalException);
199                         target.add(path, newValue);
200                     }
201                 }
202                 failures.remove(derivable);
203                 String ttsPath = path + "[@type=\"tts\"]";
204                 String shortName2 = DAIP.processInput(path, shortName, internalException);
205                 target.add(ttsPath, shortName2);
206             }
207             failures.freeze();
208             if (!failures.isEmpty()) {
209                 Level level =
210                         StandardCodes.make().getLocaleCoverageLevel(Organization.cldr, locale);
211                 System.out.println(
212                         "Failures\t"
213                                 + locale
214                                 + "\t"
215                                 + level
216                                 + "\t"
217                                 + english.getName(locale)
218                                 + "\t"
219                                 + failures.size()
220                                 + "\t"
221                                 + failures.toPattern(false));
222                 failureMap.put(locale, failures.size());
223             }
224             if (missingOnly) {
225                 continue;
226             }
227             try (PrintWriter pw =
228                     FileUtilities.openUTF8Writer(
229                             CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml")) {
230                 target.write(pw);
231             }
232         }
233         Factory factory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", ".*");
234         for (String locale : locales) {
235             if (LocaleNames.ROOT.equals(locale)) {
236                 continue;
237             }
238             if (!localeMatcher.reset(locale).matches()) {
239                 continue;
240             }
241             CLDRFile cldrFileUnresolved = factory.make(locale, false);
242             CLDRFile cldrFileResolved = factory.make(locale, true);
243             Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier
244             boolean gotOne = false;
245             for (String xpath : cldrFileUnresolved) {
246                 if (xpath.startsWith("//ldml/identity")) {
247                     continue;
248                 }
249 
250                 String value = cldrFileUnresolved.getStringValue(xpath);
251 
252                 // remove items that are the same as their bailey values. This also catches
253                 // Inheritance Marker
254 
255                 String bailey = cldrFileResolved.getBaileyValue(xpath, null, null);
256                 if (value.equals(bailey)) {
257                     toRemove.add(xpath);
258                     continue;
259                 }
260                 gotOne = true;
261             }
262             if (!gotOne) {
263                 if (locale.equals("sr_Cyrl")) {
264                     System.err.println("TODO: keep from deleting files with non-empty children");
265                 } else {
266                     System.out.println("Removing empty " + locale);
267                     new File(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml")
268                             .deleteOnExit();
269                 }
270             } else if (!toRemove.isEmpty()) {
271                 System.out.println("Removing " + toRemove.size() + " items from " + locale);
272                 CLDRFile fileToWrite = cldrFileUnresolved.cloneAsThawed();
273                 fileToWrite.removeAll(toRemove, false);
274                 File file =
275                         new File(
276                                 CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml");
277                 try (PrintWriter pw = new PrintWriter(file)) {
278                     fileToWrite.write(pw);
279                 }
280             }
281         }
282         System.out.println(
283                 "Be sure to run CLDRModify passes afterwards, and generate transformed locales (like de-CH).");
284         if (!failureMap.isEmpty()) {
285             failureMap
286                     .entrySet()
287                     .forEach(
288                             e ->
289                                     System.err.printf(
290                                             "ERROR: %s: %d errors\n", e.getKey(), e.getValue()));
291             System.err.printf("ERROR: Errors in %d/%d locales.\n", failureMap.size(), processCount);
292             System.exit(1);
293         } else if (processCount == 0) {
294             System.err.println("ERROR: No locales matched. Check the -f option.\n");
295             System.exit(1);
296         } else {
297             System.out.printf("OK: %d locales processed without error\n", processCount);
298             System.exit(0);
299         }
300     }
301 }
302