• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.HashSet;
7 import java.util.LinkedHashMap;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Set;
11 import java.util.TreeMap;
12 import java.util.TreeSet;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15 
16 import org.unicode.cldr.draft.FileUtilities;
17 import org.unicode.cldr.test.DisplayAndInputProcessor;
18 import org.unicode.cldr.tool.Option.Options;
19 import org.unicode.cldr.tool.Option.Params;
20 import org.unicode.cldr.util.Annotations;
21 import org.unicode.cldr.util.Annotations.AnnotationSet;
22 import org.unicode.cldr.util.CLDRConfig;
23 import org.unicode.cldr.util.CLDRFile;
24 import org.unicode.cldr.util.CLDRPaths;
25 import org.unicode.cldr.util.CldrUtility;
26 import org.unicode.cldr.util.Emoji;
27 import org.unicode.cldr.util.Factory;
28 import org.unicode.cldr.util.Level;
29 import org.unicode.cldr.util.Organization;
30 import org.unicode.cldr.util.SimpleXMLSource;
31 import org.unicode.cldr.util.StandardCodes;
32 import org.unicode.cldr.util.XPathParts.Comments.CommentType;
33 
34 import com.google.common.base.Joiner;
35 import com.google.common.base.Splitter;
36 import com.google.common.collect.ImmutableSortedSet;
37 import com.ibm.icu.impl.Utility;
38 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
39 import com.ibm.icu.text.UnicodeSet;
40 
41 public class GenerateDerivedAnnotations {
42     // Use EmojiData.getDerivableNames() to update this for each version of Unicode.
43 
44     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
45 
46     static final UnicodeSet SKIP = new UnicodeSet()
47         .add(Annotations.ENGLISH_MARKER)
48         .add(Annotations.BAD_MARKER)
49         .add(Annotations.MISSING_MARKER)
50         .freeze();
51 
52     static Map<String,String> codepointToIsoCurrencyCode;
53     static {
54         final Splitter tabSplitter = Splitter.on('\t').trimResults();
55         Map<String,String> _codepointToIsoCurrencyCode = new TreeMap<>();
56         for (String line : FileUtilities.in(CldrUtility.class, "data/codepointToIsoCurrencyCode.tsv")) {
57             if (line.startsWith("#")) {
58                 continue;
59             }
60             List<String> parts = tabSplitter.splitToList(line);
61             _codepointToIsoCurrencyCode.put(parts.get(0), parts.get(1));
62         }
63         codepointToIsoCurrencyCode = ImmutableMap.copyOf(_codepointToIsoCurrencyCode);
64     }
65 
66     private enum MyOptions {
67         fileFilter(new Params().setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en").setMatch(".*").setDefault(".*")),
68         missing(new Params().setHelp("only missing").setMatch("")),
69         ;
70 
71         // BOILERPLATE TO COPY
72         final Option option;
73 
MyOptions(Params params)74         private MyOptions(Params params) {
75             option = new Option(this, params);
76         }
77 
78         private static Options myOptions = new Options();
79         static {
80             for (MyOptions option : MyOptions.values()) {
myOptions.add(option, option.option)81                 myOptions.add(option, option.option);
82             }
83         }
84 
parse(String[] args)85         private static Set<String> parse(String[] args) {
86             return myOptions.parse(MyOptions.values()[0], args, true);
87         }
88     }
89 
main(String[] args)90     public static void main(String[] args) throws IOException {
91         MyOptions.parse(args);
92 
93         boolean missingOnly = MyOptions.missing.option.doesOccur();
94         if (missingOnly) {
95             System.out.println("With the 'missing' argument files will not be written, only the missing items will be written to the console");
96         }
97 
98         Matcher localeMatcher = Pattern.compile(MyOptions.fileFilter.option.getValue()).matcher("");
99         Joiner BAR = Joiner.on(" | ");
100         AnnotationSet enAnnotations = Annotations.getDataSet("en");
101         CLDRFile english = CLDR_CONFIG.getEnglish();
102 
103         UnicodeSet derivables = new UnicodeSet(Emoji.getAllRgiNoES())
104             .addAll(codepointToIsoCurrencyCode.keySet())
105             .removeAll(enAnnotations.keySet())
106             .freeze();
107 
108         for (String d : derivables) {
109             if (d.contains("����")) {
110                 System.out.println(d + "\t" + Utility.hex(d));
111             }
112         }
113 
114         Map<String, UnicodeSet> localeToFailures = new LinkedHashMap<>();
115         Set<String> locales = ImmutableSortedSet.copyOf(Annotations.getAvailable());
116         final Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory();
117         final Map<String, Integer> failureMap = new TreeMap<>();
118         int processCount = 0;
119 
120         for (String locale : locales) {
121             if ("root".equals(locale)) {
122                 continue;
123             }
124             if (!localeMatcher.reset(locale).matches()) {
125                 continue;
126             }
127             processCount++;
128             UnicodeSet failures = new UnicodeSet(Emoji.getAllRgiNoES());
129             localeToFailures.put(locale, failures);
130 
131             AnnotationSet annotations;
132             try {
133                 annotations = Annotations.getDataSet(locale);
134                 failures.removeAll(annotations.getExplicitValues());
135             } catch (Exception e) {
136                 System.out.println("Can't create annotations for: " + locale + "\n\t" + e.getMessage());
137                 annotations = Annotations.getDataSet(locale);
138                 continue;
139             }
140             CLDRFile target = new CLDRFile(new SimpleXMLSource(locale));
141             CLDRFile main = null;
142             DisplayAndInputProcessor DAIP = new DisplayAndInputProcessor(target);
143             Exception[] internalException = new Exception[1];
144 
145             target.addComment("//ldml", "Derived short names and annotations, using GenerateDerivedAnnotations.java. See warnings in /annotations/ file.",
146                 CommentType.PREBLOCK);
147             for (String derivable : derivables) {
148                 String shortName = null;
149                 try {
150                     shortName = annotations.getShortName(derivable);
151                 } catch (Exception e) {
152                 }
153 
154                 if (shortName == null) {
155                     String currencyCode = codepointToIsoCurrencyCode.get(derivable);
156                     if (currencyCode != null) {
157                         if (main == null) {
158                             main = cldrFactory.make(locale, true);
159                         }
160                         shortName = main.getName(CLDRFile.CURRENCY_NAME, currencyCode);
161                         if (shortName.contentEquals(currencyCode)) {
162                             shortName = null; // don't want fallback raw code
163                         }
164                     }
165                 }
166 
167                 if (shortName == null || SKIP.containsSome(shortName)) {
168                     continue; // missing
169                 }
170                 Set<String> keywords = annotations.getKeywordsMinus(derivable);
171                 String path = "//ldml/annotations/annotation[@cp=\"" + derivable + "\"]";
172                 if (!keywords.isEmpty()) {
173                     Set<String> keywordsFixed = new HashSet<>();
174                     for (String keyword : keywords) {
175                         if (!SKIP.containsSome(keyword)) {
176                             keywordsFixed.add(keyword);
177                         }
178                     }
179                     if (!keywordsFixed.isEmpty()) {
180                         String value = BAR.join(keywordsFixed);
181                         String newValue = DAIP.processInput(path, value, internalException);
182                         target.add(path, newValue);
183                     }
184                 }
185                 failures.remove(derivable);
186                 String ttsPath = path + "[@type=\"tts\"]";
187                 String shortName2 = DAIP.processInput(path, shortName, internalException);
188                 target.add(ttsPath, shortName2);
189             }
190             failures.freeze();
191             if (!failures.isEmpty()) {
192                 Level level = StandardCodes.make().getLocaleCoverageLevel(Organization.cldr, locale);
193                 System.out.println("Failures\t" + locale
194                     + "\t" + level
195                     + "\t" + english.getName(locale)
196                     + "\t" + failures.size()
197                     + "\t" + failures.toPattern(false));
198                 failureMap.put(locale, failures.size());
199             }
200             if (missingOnly) {
201                 continue;
202             }
203             try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml")) {
204                 target.write(pw);
205             }
206         }
207         Factory factory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", ".*");
208         for (String locale : locales) {
209             if ("root".equals(locale)) {
210                 continue;
211             }
212             if (!localeMatcher.reset(locale).matches()) {
213                 continue;
214             }
215             CLDRFile cldrFileUnresolved = factory.make(locale, false);
216             CLDRFile cldrFileResolved = factory.make(locale, true);
217             Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier
218             boolean gotOne = false;
219             for (String xpath : cldrFileUnresolved) {
220                 if (xpath.startsWith("//ldml/identity")) {
221                     continue;
222                 }
223 
224                 String value = cldrFileUnresolved.getStringValue(xpath);
225 
226                 // remove items that are the same as their bailey values. This also catches Inheritance Marker
227 
228                 String bailey = cldrFileResolved.getBaileyValue(xpath, null, null);
229                 if (value.equals(bailey)) {
230                     toRemove.add(xpath);
231                     continue;
232                 }
233                 gotOne = true;
234             }
235             if (!gotOne) {
236                 if (locale.equals("sr_Cyrl")) {
237                     System.err.println("TODO: keep from deleting files with non-empty children");
238                 } else {
239                     System.out.println("Removing empty " + locale);
240                     new File(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml").deleteOnExit();
241                 }
242             } else if (!toRemove.isEmpty()) {
243                 System.out.println("Removing " + toRemove.size() + " items from " + locale);
244                 CLDRFile fileToWrite = cldrFileUnresolved.cloneAsThawed();
245                 fileToWrite.removeAll(toRemove, false);
246                 File file = new File(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml");
247                 try (PrintWriter pw = new PrintWriter(file)) {
248                     fileToWrite.write(pw);
249                 }
250             }
251         }
252         System.out.println("Be sure to run CLDRModify passes afterwards, and generate transformed locales (like de-CH).");
253         if (!failureMap.isEmpty()) {
254             failureMap.entrySet().forEach(e -> System.err.printf("ERROR: %s: %d errors\n", e.getKey(), e.getValue()));
255             System.err.printf("ERROR: Errors in %d/%d locales.\n", failureMap.size(), processCount);
256             System.exit(1);
257         } else if(processCount == 0) {
258             System.err.println("ERROR: No locales matched. Check the -f option.\n");
259             System.exit(1);
260         } else {
261             System.out.printf("OK: %d locales processed without error\n", processCount);
262             System.exit(0);
263         }
264     }
265 }
266