• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package main.java.org.unicode.icu.tool.cldrtoicu.generator;
2 
3 import com.google.common.base.Splitter;
4 import main.java.org.unicode.icu.tool.cldrtoicu.CodeGenerator;
5 import org.unicode.cldr.api.*;
6 
7 import java.io.PrintWriter;
8 import java.nio.file.Path;
9 import java.util.*;
10 
11 import static com.google.common.base.CharMatcher.whitespace;
12 
13 public class ResourceFallbackCodeGenerator implements CodeGenerator {
14     private Map<String, String> defaultScripts;
15     private Map<String, String> parentLocales;
16     private Splitter localeIDSplitter;
17     private Splitter childLocaleSplitter;
18 
19     @Override
generateCode(Path cldrPath, PrintWriter cFileOut, PrintWriter javaFileOut)20     public void generateCode(Path cldrPath, PrintWriter cFileOut, PrintWriter javaFileOut) {
21         defaultScripts = new TreeMap<String, String>();
22         parentLocales = new TreeMap<String, String>();
23         localeIDSplitter = Splitter.on('_');
24         childLocaleSplitter = Splitter.on(whitespace()).omitEmptyStrings();
25 
26         CldrDataSupplier supplier = CldrDataSupplier.forCldrFilesIn(cldrPath);
27         CldrData supplementalData = supplier.getDataForType(CldrDataType.SUPPLEMENTAL);
28         supplementalData.accept(CldrData.PathOrder.NESTED_GROUPING, new CldrData.PrefixVisitor() {
29             @Override
30             public void visitPrefixStart(CldrPath prefix, Context context) {
31                 if (prefix.getName().endsWith("likelySubtags")) {
32                     context.install(cldrValue -> handleLikelySubtag(cldrValue));
33                 } else if (prefix.getName().endsWith("parentLocales")) {
34                     context.install(cldrValue -> handleParentLocale(cldrValue));
35                 }
36             }
37         });
38 
39         generateCFile(cFileOut);
40         generateJavaFile(javaFileOut);
41     }
42 
handleLikelySubtag(CldrValue value)43     private void handleLikelySubtag(CldrValue value) {
44         String from = value.get(AttributeKey.keyOf("likelySubtag", "from"));
45         String to = value.get(AttributeKey.keyOf("likelySubtag", "to"));
46 
47         String[] fromPieces = localeIDSplitter.splitToList(from).toArray(new String[] {});
48         String[] toPieces = localeIDSplitter.splitToList(to).toArray(new String[] {});
49 
50         if (toPieces.length != 3) {
51             throw new IllegalArgumentException("Didn't get 3 segments in 'to' value: from=" + from + ", to=" + to);
52         }
53         if (fromPieces[0].equals("und")) {
54             // ignore "und" entries-- they don't yield useful default-script information
55             return;
56         }
57         if (fromPieces.length >= 3) {
58             throw new IllegalArgumentException("'from' entry has a non-'und' language and also has a script code: from=" + from + ", to=" + to);
59         }
60         if (fromPieces.length == 2 && fromPieces[1].length() > 3) {
61             // the locale ID consists of just a language and a script-- the script code is redundant and doesn't
62             // supply any default-script info
63             return;
64         }
65 
66         String defaultScript = toPieces[1]; // toPieces is always three elements, so the second one is always the script
67         if (!defaultScript.equals("Latn")) {
68             // to save room, don't include all the entries where the default script is Latn
69             defaultScripts.put(from, defaultScript);
70         }
71     }
72 
handleParentLocale(CldrValue value)73     private void handleParentLocale(CldrValue value) {
74         String parent = value.get(AttributeKey.keyOf("parentLocale", "parent"));
75         String childrenStr = value.get(AttributeKey.keyOf("parentLocale", "locales"));
76 
77         for (String child : childLocaleSplitter.split(childrenStr)) {
78             parentLocales.put(child, parent);
79         }
80     }
81 
generateCFile(PrintWriter out)82     private void generateCFile(PrintWriter out) {
83         out.println("// © 2022 and later: Unicode, Inc. and others.");
84         out.println("// License & terms of use: http://www.unicode.org/copyright.html");
85         out.println("//");
86         out.println("// Internal static data tables used by uresbund.cpp");
87         out.println("// WARNING: This file is mechanically generated by the CLDR-to-ICU tool");
88         out.println("// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java).");
89         out.println("// DO NOT HAND EDIT!!!");
90         out.println();
91         out.println("#ifdef INCLUDED_FROM_URESBUND_CPP");
92         out.println();
93 
94         out.println("//======================================================================");
95         out.println("// Default script table");
96         Map<String, Integer> scriptIndex = buildCompositeString(defaultScripts.values(), "scriptCodeChars", out);
97         Map<String, Integer> localeIDIndex = buildCompositeString(defaultScripts.keySet(), "dsLocaleIDChars", out);
98         writeStringToStringIndex(defaultScripts, localeIDIndex, scriptIndex, "defaultScriptTable", out);
99 
100         out.println("//======================================================================");
101         out.println("// Parent locale table");
102         TreeSet<String> combinedLocaleIDs = new TreeSet<>();
103         combinedLocaleIDs.addAll(parentLocales.keySet());
104         combinedLocaleIDs.addAll(parentLocales.values());
105         localeIDIndex = buildCompositeString(combinedLocaleIDs, "parentLocaleChars", out);
106         writeStringToStringIndex(parentLocales, localeIDIndex, localeIDIndex, "parentLocaleTable", out);
107 
108         out.println();
109         out.println("#endif  // INCLUDED_FROM_URESBUND_CPP");
110     }
111 
buildCompositeString(Collection<String> strings, String variableName, PrintWriter out)112     private Map<String, Integer> buildCompositeString(Collection<String> strings, String variableName, PrintWriter out) {
113         Map<String, Integer> stringIndex = new TreeMap<>();
114         for (String string : strings) {
115             stringIndex.putIfAbsent(string, 0);
116         }
117         out.println("const char " + variableName + "[] =");
118         out.print("    \"");
119         int nextStringOffset = 0;
120         int charsOnLine = 0;
121         for (String string : stringIndex.keySet()) {
122             out.print(string);
123             out.print("\\0");
124             stringIndex.put(string, nextStringOffset);
125             nextStringOffset += string.length() + 1;
126             charsOnLine += string.length() + 2;
127 
128             if (charsOnLine > 60) {
129                 out.println("\"");
130                 out.print("    \"");
131                 charsOnLine = 0;
132             }
133         }
134         out.println("\";");
135         out.println();
136         return stringIndex;
137     }
138 
writeStringToStringIndex(Map<String, String> index, Map<String, Integer> keyIndex, Map<String, Integer> valueIndex, String variableName, PrintWriter out)139     private void writeStringToStringIndex(Map<String, String> index, Map<String, Integer> keyIndex, Map<String, Integer> valueIndex, String variableName, PrintWriter out) {
140         out.println("const int32_t " + variableName + "[] = {");
141         for (Map.Entry<String, String> entry : index.entrySet()) {
142             String key = entry.getKey();
143             String value = entry.getValue();
144             out.println("    " + keyIndex.get(key) + ", " + valueIndex.get(value) + ",  // " + key + " -> " + value);
145         }
146         out.println("};");
147         out.println();
148     }
149 
generateJavaFile(PrintWriter out)150     private void generateJavaFile(PrintWriter out) {
151         out.println("// © 2022 and later: Unicode, Inc. and others.");
152         out.println("// License & terms of use: http://www.unicode.org/copyright.html");
153         out.println("//");
154         out.println("// Internal static data tables used by ICUResourceBundle.java");
155         out.println("// WARNING: This file is mechanically generated by the CLDR-to-ICU tool");
156         out.println("// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java).");
157         out.println("// DO NOT HAND EDIT!!!");
158         out.println();
159 
160         out.println("package com.ibm.icu.impl;");
161         out.println();
162 		out.println("import java.util.Collections;");
163 		out.println("import java.util.HashMap;");
164 		out.println("import java.util.Map;");
165         out.println();
166         out.println("class LocaleFallbackData {");
167 
168         out.println("    //======================================================================");
169         out.println("    // Default script table");
170 		out.println("    public static final Map<String, String> DEFAULT_SCRIPT_TABLE = buildDefaultScriptTable();");
171 		out.println();
172 		out.println("    private static Map<String, String> buildDefaultScriptTable() {");
173 		out.println("        Map<String, String> t = new HashMap<>();");
174         for (Map.Entry<String, String> entry : defaultScripts.entrySet()) {
175             out.println("        t.put(\"" + entry.getKey() + "\", \"" + entry.getValue() + "\");");
176         }
177 		out.println("        return Collections.unmodifiableMap(t);");
178         out.println("    }");
179         out.println();
180 
181         out.println("    //======================================================================");
182         out.println("    // Parent locale table");
183 		out.println("    public static final Map<String, String> PARENT_LOCALE_TABLE = buildParentLocaleTable();");
184 		out.println();
185 		out.println("    private static Map<String, String> buildParentLocaleTable() {");
186 		out.println("        Map<String, String> t = new HashMap<>();");
187         for (Map.Entry<String, String> entry : parentLocales.entrySet()) {
188             out.println("        t.put(\"" + entry.getKey() + "\", \"" + entry.getValue() + "\");");
189         }
190 		out.println("        return Collections.unmodifiableMap(t);");
191         out.println("    }");
192         out.println("}");
193     }
194 }
195