1 package main.java.org.unicode.icu.tool.cldrtoicu.generator; 2 3 import com.google.common.base.Splitter; 4 import main.java.org.unicode.icu.tool.cldrtoicu.CodeGenerator; 5 import org.unicode.cldr.api.*; 6 7 import java.io.PrintWriter; 8 import java.nio.file.Path; 9 import java.util.*; 10 11 import static com.google.common.base.CharMatcher.whitespace; 12 13 public class ResourceFallbackCodeGenerator implements CodeGenerator { 14 private Map<String, String> defaultScripts; 15 private Map<String, String> parentLocales; 16 private Splitter localeIDSplitter; 17 private Splitter childLocaleSplitter; 18 19 @Override generateCode(Path cldrPath, PrintWriter cFileOut, PrintWriter javaFileOut)20 public void generateCode(Path cldrPath, PrintWriter cFileOut, PrintWriter javaFileOut) { 21 defaultScripts = new TreeMap<String, String>(); 22 parentLocales = new TreeMap<String, String>(); 23 localeIDSplitter = Splitter.on('_'); 24 childLocaleSplitter = Splitter.on(whitespace()).omitEmptyStrings(); 25 26 CldrDataSupplier supplier = CldrDataSupplier.forCldrFilesIn(cldrPath); 27 CldrData supplementalData = supplier.getDataForType(CldrDataType.SUPPLEMENTAL); 28 supplementalData.accept(CldrData.PathOrder.NESTED_GROUPING, new CldrData.PrefixVisitor() { 29 @Override 30 public void visitPrefixStart(CldrPath prefix, Context context) { 31 if (prefix.getName().endsWith("likelySubtags")) { 32 context.install(cldrValue -> handleLikelySubtag(cldrValue)); 33 } else if (prefix.getName().endsWith("parentLocales")) { 34 context.install(cldrValue -> handleParentLocale(cldrValue)); 35 } 36 } 37 }); 38 39 generateCFile(cFileOut); 40 generateJavaFile(javaFileOut); 41 } 42 handleLikelySubtag(CldrValue value)43 private void handleLikelySubtag(CldrValue value) { 44 String from = value.get(AttributeKey.keyOf("likelySubtag", "from")); 45 String to = value.get(AttributeKey.keyOf("likelySubtag", "to")); 46 47 String[] fromPieces = localeIDSplitter.splitToList(from).toArray(new String[] {}); 48 String[] toPieces = localeIDSplitter.splitToList(to).toArray(new String[] {}); 49 50 if (toPieces.length != 3) { 51 throw new IllegalArgumentException("Didn't get 3 segments in 'to' value: from=" + from + ", to=" + to); 52 } 53 if (fromPieces[0].equals("und")) { 54 // ignore "und" entries-- they don't yield useful default-script information 55 return; 56 } 57 if (fromPieces.length >= 3) { 58 throw new IllegalArgumentException("'from' entry has a non-'und' language and also has a script code: from=" + from + ", to=" + to); 59 } 60 if (fromPieces.length == 2 && fromPieces[1].length() > 3) { 61 // the locale ID consists of just a language and a script-- the script code is redundant and doesn't 62 // supply any default-script info 63 return; 64 } 65 66 String defaultScript = toPieces[1]; // toPieces is always three elements, so the second one is always the script 67 if (!defaultScript.equals("Latn")) { 68 // to save room, don't include all the entries where the default script is Latn 69 defaultScripts.put(from, defaultScript); 70 } 71 } 72 handleParentLocale(CldrValue value)73 private void handleParentLocale(CldrValue value) { 74 String parent = value.get(AttributeKey.keyOf("parentLocale", "parent")); 75 String childrenStr = value.get(AttributeKey.keyOf("parentLocale", "locales")); 76 77 for (String child : childLocaleSplitter.split(childrenStr)) { 78 parentLocales.put(child, parent); 79 } 80 } 81 generateCFile(PrintWriter out)82 private void generateCFile(PrintWriter out) { 83 out.println("// © 2022 and later: Unicode, Inc. and others."); 84 out.println("// License & terms of use: http://www.unicode.org/copyright.html"); 85 out.println("//"); 86 out.println("// Internal static data tables used by uresbund.cpp"); 87 out.println("// WARNING: This file is mechanically generated by the CLDR-to-ICU tool"); 88 out.println("// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java)."); 89 out.println("// DO NOT HAND EDIT!!!"); 90 out.println(); 91 out.println("#ifdef INCLUDED_FROM_URESBUND_CPP"); 92 out.println(); 93 94 out.println("//======================================================================"); 95 out.println("// Default script table"); 96 Map<String, Integer> scriptIndex = buildCompositeString(defaultScripts.values(), "scriptCodeChars", out); 97 Map<String, Integer> localeIDIndex = buildCompositeString(defaultScripts.keySet(), "dsLocaleIDChars", out); 98 writeStringToStringIndex(defaultScripts, localeIDIndex, scriptIndex, "defaultScriptTable", out); 99 100 out.println("//======================================================================"); 101 out.println("// Parent locale table"); 102 TreeSet<String> combinedLocaleIDs = new TreeSet<>(); 103 combinedLocaleIDs.addAll(parentLocales.keySet()); 104 combinedLocaleIDs.addAll(parentLocales.values()); 105 localeIDIndex = buildCompositeString(combinedLocaleIDs, "parentLocaleChars", out); 106 writeStringToStringIndex(parentLocales, localeIDIndex, localeIDIndex, "parentLocaleTable", out); 107 108 out.println(); 109 out.println("#endif // INCLUDED_FROM_URESBUND_CPP"); 110 } 111 buildCompositeString(Collection<String> strings, String variableName, PrintWriter out)112 private Map<String, Integer> buildCompositeString(Collection<String> strings, String variableName, PrintWriter out) { 113 Map<String, Integer> stringIndex = new TreeMap<>(); 114 for (String string : strings) { 115 stringIndex.putIfAbsent(string, 0); 116 } 117 out.println("const char " + variableName + "[] ="); 118 out.print(" \""); 119 int nextStringOffset = 0; 120 int charsOnLine = 0; 121 for (String string : stringIndex.keySet()) { 122 out.print(string); 123 out.print("\\0"); 124 stringIndex.put(string, nextStringOffset); 125 nextStringOffset += string.length() + 1; 126 charsOnLine += string.length() + 2; 127 128 if (charsOnLine > 60) { 129 out.println("\""); 130 out.print(" \""); 131 charsOnLine = 0; 132 } 133 } 134 out.println("\";"); 135 out.println(); 136 return stringIndex; 137 } 138 writeStringToStringIndex(Map<String, String> index, Map<String, Integer> keyIndex, Map<String, Integer> valueIndex, String variableName, PrintWriter out)139 private void writeStringToStringIndex(Map<String, String> index, Map<String, Integer> keyIndex, Map<String, Integer> valueIndex, String variableName, PrintWriter out) { 140 out.println("const int32_t " + variableName + "[] = {"); 141 for (Map.Entry<String, String> entry : index.entrySet()) { 142 String key = entry.getKey(); 143 String value = entry.getValue(); 144 out.println(" " + keyIndex.get(key) + ", " + valueIndex.get(value) + ", // " + key + " -> " + value); 145 } 146 out.println("};"); 147 out.println(); 148 } 149 generateJavaFile(PrintWriter out)150 private void generateJavaFile(PrintWriter out) { 151 out.println("// © 2022 and later: Unicode, Inc. and others."); 152 out.println("// License & terms of use: http://www.unicode.org/copyright.html"); 153 out.println("//"); 154 out.println("// Internal static data tables used by ICUResourceBundle.java"); 155 out.println("// WARNING: This file is mechanically generated by the CLDR-to-ICU tool"); 156 out.println("// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java)."); 157 out.println("// DO NOT HAND EDIT!!!"); 158 out.println(); 159 160 out.println("package com.ibm.icu.impl;"); 161 out.println(); 162 out.println("import java.util.Collections;"); 163 out.println("import java.util.HashMap;"); 164 out.println("import java.util.Map;"); 165 out.println(); 166 out.println("class LocaleFallbackData {"); 167 168 out.println(" //======================================================================"); 169 out.println(" // Default script table"); 170 out.println(" public static final Map<String, String> DEFAULT_SCRIPT_TABLE = buildDefaultScriptTable();"); 171 out.println(); 172 out.println(" private static Map<String, String> buildDefaultScriptTable() {"); 173 out.println(" Map<String, String> t = new HashMap<>();"); 174 for (Map.Entry<String, String> entry : defaultScripts.entrySet()) { 175 out.println(" t.put(\"" + entry.getKey() + "\", \"" + entry.getValue() + "\");"); 176 } 177 out.println(" return Collections.unmodifiableMap(t);"); 178 out.println(" }"); 179 out.println(); 180 181 out.println(" //======================================================================"); 182 out.println(" // Parent locale table"); 183 out.println(" public static final Map<String, String> PARENT_LOCALE_TABLE = buildParentLocaleTable();"); 184 out.println(); 185 out.println(" private static Map<String, String> buildParentLocaleTable() {"); 186 out.println(" Map<String, String> t = new HashMap<>();"); 187 for (Map.Entry<String, String> entry : parentLocales.entrySet()) { 188 out.println(" t.put(\"" + entry.getKey() + "\", \"" + entry.getValue() + "\");"); 189 } 190 out.println(" return Collections.unmodifiableMap(t);"); 191 out.println(" }"); 192 out.println("}"); 193 } 194 } 195